38#include "llvm/IR/IntrinsicsAMDGPU.h"
39#include "llvm/IR/IntrinsicsR600.h"
47#define DEBUG_TYPE "si-lower"
52 "amdgpu-disable-loop-alignment",
53 cl::desc(
"Do not align and prefetch loops"),
57 "amdgpu-use-divergent-register-indexing",
59 cl::desc(
"Use indirect register addressing for divergent indexes"),
73 unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
74 for (
unsigned Reg = 0; Reg < NumSGPRs; ++
Reg) {
76 return AMDGPU::SGPR0 +
Reg;
177 {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
178 MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
179 MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32,
180 MVT::i1, MVT::v32i32},
184 {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
185 MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
186 MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32,
187 MVT::i1, MVT::v32i32},
225 {MVT::f32, MVT::i32, MVT::i64, MVT::f64, MVT::i1},
Expand);
232 {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
233 MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
234 MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32},
237 {MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
238 MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v9f32,
239 MVT::v10f32, MVT::v11f32, MVT::v12f32, MVT::v16f32},
243 {MVT::v2i1, MVT::v4i1, MVT::v2i8, MVT::v4i8, MVT::v2i16,
244 MVT::v3i16, MVT::v4i16, MVT::Other},
249 {MVT::i1, MVT::i32, MVT::i64, MVT::f32, MVT::f64},
Expand);
265 {MVT::v8i32, MVT::v8f32, MVT::v9i32, MVT::v9f32, MVT::v10i32,
266 MVT::v10f32, MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32,
267 MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64, MVT::v4i16,
268 MVT::v4f16, MVT::v3i64, MVT::v3f64, MVT::v6i32, MVT::v6f32,
269 MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64, MVT::v8i16,
270 MVT::v8f16, MVT::v16i16, MVT::v16f16, MVT::v16i64, MVT::v16f64,
271 MVT::v32i32, MVT::v32f32}) {
303 for (
MVT Vec64 : { MVT::v2i64, MVT::v2f64 }) {
317 for (
MVT Vec64 : { MVT::v3i64, MVT::v3f64 }) {
331 for (
MVT Vec64 : { MVT::v4i64, MVT::v4f64 }) {
345 for (
MVT Vec64 : { MVT::v8i64, MVT::v8f64 }) {
359 for (
MVT Vec64 : { MVT::v16i64, MVT::v16f64 }) {
374 {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32},
382 {MVT::v2i16, MVT::v2f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
383 MVT::v4i16, MVT::v4f16},
388 {MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32},
Custom);
392 {MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
393 MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32,
394 MVT::v9i32, MVT::v9f32, MVT::v10i32, MVT::v10f32,
395 MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32},
470 {MVT::f32, MVT::f64},
Legal);
547 for (
MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16, MVT::v8i16,
548 MVT::v8f16, MVT::v16i16, MVT::v16f16}) {
656 {MVT::v4f16, MVT::v8f16, MVT::v16f16},
Custom);
659 {MVT::v4f16, MVT::v8f16, MVT::v16f16},
Expand);
661 for (
MVT Vec16 : {MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16}) {
683 {MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::v8i16,
684 MVT::v16f16, MVT::v16i16},
687 for (
MVT VT : {MVT::v4i16, MVT::v8i16, MVT::v16i16})
695 for (
MVT VT : {MVT::v4f16, MVT::v8f16, MVT::v16f16})
710 {MVT::v4f32, MVT::v8f32, MVT::v16f32, MVT::v32f32},
730 {MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
731 MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16},
740 {MVT::Other, MVT::f32, MVT::v4f32, MVT::i16, MVT::f16,
741 MVT::v2i16, MVT::v2f16, MVT::i128},
745 {MVT::v2f16, MVT::v2i16, MVT::v3f16, MVT::v3i16,
746 MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::Other, MVT::f16,
747 MVT::i16, MVT::i8, MVT::i128},
751 {MVT::Other, MVT::v2i16, MVT::v2f16, MVT::v3i16,
752 MVT::v3f16, MVT::v4f16, MVT::v4i16, MVT::f16, MVT::i16,
835 DestVT.getScalarType() == MVT::f32 &&
836 SrcVT.getScalarType() == MVT::f16 &&
843 return ((Opcode == TargetOpcode::G_FMAD && Subtarget->
hasMadMixInsts()) ||
844 (Opcode == TargetOpcode::G_FMA && Subtarget->
hasFmaMixInsts())) &&
845 DestTy.getScalarSizeInBits() == 32 &&
846 SrcTy.getScalarSizeInBits() == 16 &&
870 return (
ScalarVT == MVT::bf16 ? MVT::i32 : MVT::v2f16);
872 return VT.
isInteger() ? MVT::i32 : MVT::f32;
899 return (NumElts + 1) / 2;
905 return NumElts * ((
Size + 31) / 32);
974 unsigned NumElts = std::min(
MaxNumLanes, VT->getNumElements());
990 assert(ST->getNumContainedTypes() == 2 &&
991 ST->getContainedType(1)->isIntegerTy(32));
1018 unsigned IntrID)
const {
1020 if (CI.
hasMetadata(LLVMContext::MD_invariant_load))
1045 Info.ptrVal = RsrcArg;
1099 case Intrinsic::amdgcn_raw_buffer_load_lds:
1100 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
1101 case Intrinsic::amdgcn_struct_buffer_load_lds:
1102 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
1113 case Intrinsic::amdgcn_ds_ordered_add:
1114 case Intrinsic::amdgcn_ds_ordered_swap:
1115 case Intrinsic::amdgcn_ds_fadd:
1116 case Intrinsic::amdgcn_ds_fmin:
1117 case Intrinsic::amdgcn_ds_fmax: {
1130 case Intrinsic::amdgcn_buffer_atomic_fadd: {
1138 if (!
Vol || !
Vol->isZero())
1143 case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
1144 case Intrinsic::amdgcn_ds_sub_gs_reg_rtn: {
1147 Info.ptrVal =
nullptr;
1152 case Intrinsic::amdgcn_ds_append:
1153 case Intrinsic::amdgcn_ds_consume: {
1166 case Intrinsic::amdgcn_global_atomic_csub: {
1176 case Intrinsic::amdgcn_image_bvh_intersect_ray: {
1186 case Intrinsic::amdgcn_global_atomic_fadd:
1187 case Intrinsic::amdgcn_global_atomic_fmin:
1188 case Intrinsic::amdgcn_global_atomic_fmax:
1189 case Intrinsic::amdgcn_flat_atomic_fadd:
1190 case Intrinsic::amdgcn_flat_atomic_fmin:
1191 case Intrinsic::amdgcn_flat_atomic_fmax:
1192 case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
1193 case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: {
1204 case Intrinsic::amdgcn_ds_gws_init:
1205 case Intrinsic::amdgcn_ds_gws_barrier:
1206 case Intrinsic::amdgcn_ds_gws_sema_v:
1207 case Intrinsic::amdgcn_ds_gws_sema_br:
1208 case Intrinsic::amdgcn_ds_gws_sema_p:
1209 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
1219 Info.memVT = MVT::i32;
1221 Info.align =
Align(4);
1223 if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
1229 case Intrinsic::amdgcn_global_load_lds: {
1237 case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
1247 Info.memVT = MVT::i32;
1249 Info.align =
Align(4);
1261 Type *&AccessTy)
const {
1263 case Intrinsic::amdgcn_ds_ordered_add:
1264 case Intrinsic::amdgcn_ds_ordered_swap:
1265 case Intrinsic::amdgcn_ds_append:
1266 case Intrinsic::amdgcn_ds_consume:
1267 case Intrinsic::amdgcn_ds_fadd:
1268 case Intrinsic::amdgcn_ds_fmin:
1269 case Intrinsic::amdgcn_ds_fmax:
1270 case Intrinsic::amdgcn_global_atomic_fadd:
1271 case Intrinsic::amdgcn_flat_atomic_fadd:
1272 case Intrinsic::amdgcn_flat_atomic_fmin:
1273 case Intrinsic::amdgcn_flat_atomic_fmax:
1274 case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
1275 case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
1276 case Intrinsic::amdgcn_global_atomic_csub: {
1287bool SITargetLowering::isLegalFlatAddressingMode(
const AddrMode &AM)
const {
1291 return AM.BaseOffs == 0 && AM.Scale == 0;
1294 return AM.Scale == 0 &&
1295 (AM.BaseOffs == 0 ||
1302 return AM.
Scale == 0 &&
1317 return isLegalFlatAddressingMode(AM);
1320 return isLegalMUBUFAddressingMode(AM);
1323bool SITargetLowering::isLegalMUBUFAddressingMode(
const AddrMode &AM)
const {
1345 if (AM.HasBaseReg) {
1375 return isLegalMUBUFAddressingMode(AM);
1381 if (Ty->isSized() &&
DL.getTypeStoreSize(Ty) < 4)
1414 return isLegalMUBUFAddressingMode(AM);
1439 return isLegalFlatAddressingMode(AM);
1449 return (
MemVT.getSizeInBits() <= 4 * 32);
1454 return (
MemVT.getSizeInBits() <= 2 * 32);
1460 unsigned Size,
unsigned AddrSpace,
Align Alignment,
1514 : (Alignment <
Align(4)) ? 32
1537 : (Alignment <
Align(4)) ? 32
1562 : (Alignment <
Align(4)) ? 32
1613 return Alignment >=
Align(4) ||
1627 return Size >= 32 && Alignment >=
Align(4);
1632 unsigned *
IsFast)
const {
1634 Alignment, Flags,
IsFast);
1644 if (
Op.size() >= 16 &&
1648 if (
Op.size() >= 8 &&
Op.isDstAligned(
Align(4)))
1698 unsigned Index)
const {
1761 const SDLoc &SL)
const {
1768 const SDLoc &SL)
const {
1793 if (
Arg && (
Arg->Flags.isSExt() ||
Arg->Flags.isZExt()) &&
1799 if (
MemVT.isFloatingPoint())
1800 Val = getFPExtOrFPRound(DAG, Val, SL, VT);
1809SDValue SITargetLowering::lowerKernargMemParameter(
1818 if (
MemVT.getStoreSize() < 4 && Alignment < 4) {
1858 if (
Arg.Flags.isByVal()) {
1859 unsigned Size =
Arg.Flags.getByValSize();
1865 unsigned ArgSize =
VA.getValVT().getStoreSize();
1877 switch (
VA.getLocInfo()) {
1895 ExtType, SL,
VA.getLocVT(), Chain,
FIN,
1934 assert((!
Arg->VT.isVector() ||
Arg->VT.getScalarSizeInBits() == 16) &&
1935 "vector type argument should have been split");
1945 if (
Arg->Flags.isSplit()) {
1946 while (!
Arg->Flags.isSplitEnd()) {
1948 Arg->VT.getScalarSizeInBits() == 16) &&
1949 "unexpected vector split in ps argument type");
1982 if (Info.hasWorkItemIDX()) {
1984 MRI.setType(MF.
addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
1988 Info.hasWorkItemIDY()) ? 0x3ff : ~0u;
1992 if (Info.hasWorkItemIDY()) {
1993 assert(Info.hasWorkItemIDX());
1998 unsigned Reg = AMDGPU::VGPR1;
1999 MRI.setType(MF.
addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
2006 if (Info.hasWorkItemIDZ()) {
2007 assert(Info.hasWorkItemIDX() && Info.hasWorkItemIDY());
2012 unsigned Reg = AMDGPU::VGPR2;
2013 MRI.setType(MF.
addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
2041 assert(Reg != AMDGPU::NoRegister);
2059 assert(Reg != AMDGPU::NoRegister);
2073 assert(Reg != AMDGPU::NoRegister);
2099 const unsigned Mask = 0x3ff;
2102 if (Info.hasWorkItemIDX()) {
2104 Info.setWorkItemIDX(
Arg);
2107 if (Info.hasWorkItemIDY()) {
2109 Info.setWorkItemIDY(
Arg);
2112 if (Info.hasWorkItemIDZ())
2124 const unsigned Mask = 0x3ff;
2135 auto &
ArgInfo = Info.getArgInfo();
2138 if (Info.hasDispatchPtr())
2142 if (Info.hasQueuePtr() &&
2148 if (Info.hasImplicitArgPtr())
2151 if (Info.hasDispatchID())
2156 if (Info.hasWorkGroupIDX())
2159 if (Info.hasWorkGroupIDY())
2162 if (Info.hasWorkGroupIDZ())
2165 if (Info.hasLDSKernelId())
2174 if (Info.hasImplicitBufferPtr()) {
2181 if (Info.hasPrivateSegmentBuffer()) {
2187 if (Info.hasDispatchPtr()) {
2194 if (Info.hasQueuePtr() &&
2201 if (Info.hasKernargSegmentPtr()) {
2210 if (Info.hasDispatchID()) {
2216 if (Info.hasFlatScratchInit() && !
getSubtarget()->isAmdPalOS()) {
2222 if (Info.hasLDSKernelId()) {
2223 Register Reg = Info.addLDSKernelId();
2224 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2245 assert(!HasArchitectedSGPRs &&
"Unhandled feature for the subtarget");
2252 Info.hasWorkGroupIDY() +
2253 Info.hasWorkGroupIDZ() +
2254 Info.hasWorkGroupInfo();
2256 Register Reg = Info.addReservedUserSGPR();
2257 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2262 if (Info.hasWorkGroupIDX()) {
2263 Register Reg = Info.addWorkGroupIDX(HasArchitectedSGPRs);
2264 if (!HasArchitectedSGPRs)
2265 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2270 if (Info.hasWorkGroupIDY()) {
2271 Register Reg = Info.addWorkGroupIDY(HasArchitectedSGPRs);
2272 if (!HasArchitectedSGPRs)
2273 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2278 if (Info.hasWorkGroupIDZ()) {
2279 Register Reg = Info.addWorkGroupIDZ(HasArchitectedSGPRs);
2280 if (!HasArchitectedSGPRs)
2281 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2286 if (Info.hasWorkGroupInfo()) {
2287 Register Reg = Info.addWorkGroupInfo();
2288 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2292 if (Info.hasPrivateSegmentWaveByteOffset()) {
2298 Info.getPrivateSegmentWaveByteOffsetSystemSGPR();
2314 Info.getNumPreloadedSGPRs() >= 16);
2330 Info.setHasNonSpillStackObjects(
true);
2341 if (!ST.enableFlatScratch()) {
2380 if (!
MRI.isLiveIn(AMDGPU::SGPR32)) {
2381 Info.setStackPtrOffsetReg(AMDGPU::SGPR32);
2388 for (
unsigned Reg : AMDGPU::SGPR_32RegClass) {
2389 if (!
MRI.isLiveIn(Reg)) {
2390 Info.setStackPtrOffsetReg(Reg);
2395 if (Info.getStackPtrOffsetReg() == AMDGPU::SP_REG)
2402 if (ST.getFrameLowering()->hasFP(MF)) {
2403 Info.setFrameOffsetReg(AMDGPU::SGPR33);
2430 if (AMDGPU::SReg_64RegClass.
contains(*
I))
2431 RC = &AMDGPU::SGPR_64RegClass;
2432 else if (AMDGPU::SReg_32RegClass.
contains(*
I))
2433 RC = &AMDGPU::SGPR_32RegClass;
2439 Entry->addLiveIn(*
I);
2444 for (
auto *Exit : Exits)
2446 TII->get(TargetOpcode::COPY), *
I)
2464 Fn,
"unsupported non-compute shaders with HSA",
DL.getDebugLoc());
2480 assert(!Info->hasDispatchPtr() && !Info->hasKernargSegmentPtr() &&
2481 !Info->hasWorkGroupInfo() && !Info->hasLDSKernelId() &&
2482 !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
2483 !Info->hasWorkItemIDZ());
2485 assert(!Info->hasFlatScratchInit());
2487 assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
2488 !Info->hasWorkGroupIDZ());
2507 if ((Info->getPSInputAddr() & 0x7F) == 0 ||
2508 ((Info->getPSInputAddr() & 0xF) == 0 && Info->isPSInputAllocated(11))) {
2511 Info->markPSInputAllocated(0);
2512 Info->markPSInputEnabled(0);
2523 unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
2529 assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
2531 Splits.append(Ins.begin(), Ins.end());
2537 }
else if (!IsGraphics) {
2558 for (
unsigned i = 0, e = Ins.size(),
ArgIdx = 0; i != e; ++i) {
2566 MVT VT =
VA.getLocVT();
2575 if (
Arg.Flags.isByRef()) {
2581 Arg.Flags.getPointerAddrSpace())) {
2583 Arg.Flags.getPointerAddrSpace());
2586 InVals.push_back(
Ptr);
2591 DAG, VT,
MemVT,
DL, Chain,
Offset, Alignment, Ins[i].Flags.isSExt(), &Ins[i]);
2592 Chains.push_back(
Arg.getValue(1));
2606 InVals.push_back(
Arg);
2610 InVals.push_back(Val);
2611 if (!
Arg.Flags.isByVal())
2616 assert(
VA.isRegLoc() &&
"Parameter must be in a register!");
2620 if (AMDGPU::VGPR_32RegClass.
contains(Reg))
2621 RC = &AMDGPU::VGPR_32RegClass;
2622 else if (AMDGPU::SGPR_32RegClass.
contains(Reg))
2623 RC = &AMDGPU::SGPR_32RegClass;
2626 EVT ValVT =
VA.getValVT();
2631 if (
Arg.Flags.isSRet()) {
2646 switch (
VA.getLocInfo()) {
2669 InVals.push_back(Val);
2688 return Chains.empty() ? Chain :
2712 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
2714 if (CCInfo.
isAllocated(AMDGPU::VGPR_32RegClass.getRegister(i)))
2736 Info->setIfReturnsVoid(Outs.empty());
2758 assert(
VA.isRegLoc() &&
"Can only return in registers!");
2763 switch (
VA.getLocInfo()) {
2788 if (!Info->isEntryFunction()) {
2794 if (AMDGPU::SReg_64RegClass.
contains(*
I))
2796 else if (AMDGPU::SReg_32RegClass.
contains(*
I))
2829 for (
unsigned i = 0; i !=
RVLocs.size(); ++i) {
2833 if (
VA.isRegLoc()) {
2837 }
else if (
VA.isMemLoc()) {
2842 switch (
VA.getLocInfo()) {
2865 InVals.push_back(Val);
2939 EVT ArgVT =
TRI->getSpillSize(*
ArgRC) == 8 ? MVT::i64 : MVT::i32;
2949 std::optional<uint32_t> Id =
2951 if (Id.has_value()) {
3092 if (
Callee->isDivergent())
3119 if (
Arg.hasByValAttr())
3187 for (
unsigned I = 0,
E = CLI.
Ins.size();
I !=
E; ++
I)
3196 "unsupported call to variadic function ");
3204 "unsupported required tail call to function ");
3209 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3212 "site marked musttail");
3270 RegsToPass.emplace_back(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
3279 for (
unsigned i = 0, e =
ArgLocs.size(); i != e; ++i) {
3284 switch (
VA.getLocInfo()) {
3306 if (
VA.isRegLoc()) {
3322 unsigned OpSize = Flags.isByVal() ?
3323 Flags.getByValSize() :
VA.getValVT().getStoreSize();
3328 ? Flags.getNonZeroByValAlign()
3355 if (Outs[i].Flags.isByVal()) {
3357 DAG.
getConstant(Outs[i].Flags.getByValSize(),
DL, MVT::i32);
3360 Outs[i].Flags.getNonZeroByValAlign(),
3396 std::vector<SDValue> Ops;
3397 Ops.push_back(Chain);
3425 const uint32_t *Mask =
TRI->getCallPreservedMask(MF, CallConv);
3426 assert(Mask &&
"Missing call preserved mask for calling convention");
3445 Chain = Call.getValue(0);
3446 InGlue = Call.getValue(1);
3468 EVT VT =
Op.getValueType();
3474 Register SPReg = Info->getStackPtrOffsetReg();
3492 DAG.
getConstant(ST.getWavefrontSizeLog2(), dl, MVT::i32));
3494 Align StackAlign = TFL->getStackAlign();
3496 if (Alignment && *Alignment > StackAlign) {
3499 << ST.getWavefrontSizeLog2(),
3526 .Case(
"m0", AMDGPU::M0)
3527 .Case(
"exec", AMDGPU::EXEC)
3528 .Case(
"exec_lo", AMDGPU::EXEC_LO)
3529 .Case(
"exec_hi", AMDGPU::EXEC_HI)
3530 .Case(
"flat_scratch", AMDGPU::FLAT_SCR)
3531 .Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
3532 .Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
3535 if (Reg == AMDGPU::NoRegister) {
3549 case AMDGPU::EXEC_LO:
3550 case AMDGPU::EXEC_HI:
3551 case AMDGPU::FLAT_SCR_LO:
3552 case AMDGPU::FLAT_SCR_HI:
3557 case AMDGPU::FLAT_SCR:
3576 MI.setDesc(
TII->getKillTerminatorFromPseudo(
MI.getOpcode()));
3585static std::pair<MachineBasicBlock *, MachineBasicBlock *>
3607 auto Next = std::next(
I);
3627 auto I =
MI.getIterator();
3628 auto E = std::next(
I);
3650 Src->setIsKill(
false);
3666 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
3704 Register CondReg =
MRI.createVirtualRegister(BoolRC);
3729 : AMDGPU::S_AND_SAVEEXEC_B64),
3739 SGPRIdxReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
3757 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3760 : AMDGPU::S_XOR_B64_term), Exec)
3790 const auto *
BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
3794 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3795 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
3828static std::pair<unsigned, int>
3833 int NumElts =
TRI.getRegSizeInBits(*
SuperRC) / 32;
3838 return std::pair(AMDGPU::sub0,
Offset);
3852 assert(
Idx->getReg() != AMDGPU::NoRegister);
3873 return Idx->getReg();
3875 Register Tmp =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
3892 Register SrcReg =
TII->getNamedOperand(
MI, AMDGPU::OpName::src)->getReg();
3893 int Offset =
TII->getNamedOperand(
MI, AMDGPU::OpName::offset)->getImm();
3905 if (
TII->getRegisterInfo().isSGPRClass(
IdxRC)) {
3916 TII->getIndirectGPRIDXPseudo(
TRI.getRegSizeInBits(*
VecRC),
true);
3929 MI.eraseFromParent();
3951 TII->getIndirectGPRIDXPseudo(
TRI.getRegSizeInBits(*
VecRC),
true);
3963 MI.eraseFromParent();
3980 int Offset =
TII->getNamedOperand(
MI, AMDGPU::OpName::offset)->getImm();
3993 if (
Idx->getReg() == AMDGPU::NoRegister) {
4004 MI.eraseFromParent();
4009 if (
TII->getRegisterInfo().isSGPRClass(
IdxRC)) {
4017 TII->getIndirectGPRIDXPseudo(
TRI.getRegSizeInBits(*
VecRC),
false);
4027 TRI.getRegSizeInBits(*
VecRC), 32,
false);
4033 MI.eraseFromParent();
4052 TII->getIndirectGPRIDXPseudo(
TRI.getRegSizeInBits(*
VecRC),
false);
4061 TRI.getRegSizeInBits(*
VecRC), 32,
false);
4068 MI.eraseFromParent();
4083 bool isSGPR =
TRI->isSGPRClass(
MRI.getRegClass(SrcReg));
4121 bool IsWave32 = ST.isWave32();
4122 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
4123 unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
4128 (Opc == AMDGPU::S_MIN_U32) ? std::numeric_limits<uint32_t>::max() : 0;
4147 unsigned SFFOpc = IsWave32 ? AMDGPU::S_FF1_I32_B32 : AMDGPU::S_FF1_I32_B64;
4160 IsWave32 ? AMDGPU::S_BITSET0_B32 : AMDGPU::S_BITSET0_B64;
4173 unsigned CMPOpc = IsWave32 ? AMDGPU::S_CMP_LG_U32 : AMDGPU::S_CMP_LG_U64;
4182 MI.eraseFromParent();
4193 switch (
MI.getOpcode()) {
4194 case AMDGPU::WAVE_REDUCE_UMIN_PSEUDO_U32:
4196 case AMDGPU::WAVE_REDUCE_UMAX_PSEUDO_U32:
4198 case AMDGPU::S_UADDO_PSEUDO:
4199 case AMDGPU::S_USUBO_PSEUDO: {
4206 unsigned Opc = (
MI.getOpcode() == AMDGPU::S_UADDO_PSEUDO)
4208 : AMDGPU::S_SUB_I32;
4215 MI.eraseFromParent();
4218 case AMDGPU::S_ADD_U64_PSEUDO:
4219 case AMDGPU::S_SUB_U64_PSEUDO: {
4234 MI,
MRI, Src0, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
4236 MI,
MRI, Src0, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
4239 MI,
MRI, Src1, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
4241 MI,
MRI, Src1, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
4243 bool IsAdd = (
MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
4245 unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
4246 unsigned HiOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
4254 MI.eraseFromParent();
4257 case AMDGPU::V_ADD_U64_PSEUDO:
4258 case AMDGPU::V_SUB_U64_PSEUDO: {
4264 bool IsAdd = (
MI.getOpcode() == AMDGPU::V_ADD_U64_PSEUDO);
4270 if (IsAdd && ST.hasLshlAddB64()) {
4276 TII->legalizeOperands(*
Add);
4277 MI.eraseFromParent();
4281 const auto *
CarryRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
4291 : &AMDGPU::VReg_64RegClass;
4294 : &AMDGPU::VReg_64RegClass;
4297 TRI->getSubRegisterClass(
Src0RC, AMDGPU::sub0);
4299 TRI->getSubRegisterClass(
Src1RC, AMDGPU::sub1);
4311 unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
4318 unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
4332 TII->legalizeOperands(*LoHalf);
4333 TII->legalizeOperands(*HiHalf);
4334 MI.eraseFromParent();
4337 case AMDGPU::S_ADD_CO_PSEUDO:
4338 case AMDGPU::S_SUB_CO_PSEUDO: {
4352 unsigned Opc = (
MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
4353 ? AMDGPU::S_ADDC_U32
4354 : AMDGPU::S_SUBB_U32;
4379 if (ST.hasScalarCompareEq64()) {
4385 TRI->getSubRegisterClass(
Src2RC, AMDGPU::sub0);
4409 (
WaveSize == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
4415 MI.eraseFromParent();
4418 case AMDGPU::SI_INIT_M0: {
4420 TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
4421 .
add(
MI.getOperand(0));
4422 MI.eraseFromParent();
4425 case AMDGPU::GET_GROUPSTATICSIZE: {
4430 .
add(
MI.getOperand(0))
4432 MI.eraseFromParent();
4435 case AMDGPU::SI_INDIRECT_SRC_V1:
4436 case AMDGPU::SI_INDIRECT_SRC_V2:
4437 case AMDGPU::SI_INDIRECT_SRC_V4:
4438 case AMDGPU::SI_INDIRECT_SRC_V8:
4439 case AMDGPU::SI_INDIRECT_SRC_V9:
4440 case AMDGPU::SI_INDIRECT_SRC_V10:
4441 case AMDGPU::SI_INDIRECT_SRC_V11:
4442 case AMDGPU::SI_INDIRECT_SRC_V12:
4443 case AMDGPU::SI_INDIRECT_SRC_V16:
4444 case AMDGPU::SI_INDIRECT_SRC_V32:
4446 case AMDGPU::SI_INDIRECT_DST_V1:
4447 case AMDGPU::SI_INDIRECT_DST_V2:
4448 case AMDGPU::SI_INDIRECT_DST_V4:
4449 case AMDGPU::SI_INDIRECT_DST_V8:
4450 case AMDGPU::SI_INDIRECT_DST_V9:
4451 case AMDGPU::SI_INDIRECT_DST_V10:
4452 case AMDGPU::SI_INDIRECT_DST_V11:
4453 case AMDGPU::SI_INDIRECT_DST_V12:
4454 case AMDGPU::SI_INDIRECT_DST_V16:
4455 case AMDGPU::SI_INDIRECT_DST_V32:
4457 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
4458 case AMDGPU::SI_KILL_I1_PSEUDO:
4460 case AMDGPU::V_CNDMASK_B64_PSEUDO: {
4473 const auto *
CondRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
4480 .
addReg(Src0, 0, AMDGPU::sub0)
4482 .
addReg(Src1, 0, AMDGPU::sub0)
4486 .
addReg(Src0, 0, AMDGPU::sub1)
4488 .
addReg(Src1, 0, AMDGPU::sub1)
4496 MI.eraseFromParent();
4499 case AMDGPU::SI_BR_UNDEF: {
4503 .
add(
MI.getOperand(0));
4505 MI.eraseFromParent();
4508 case AMDGPU::ADJCALLSTACKUP:
4509 case AMDGPU::ADJCALLSTACKDOWN: {
4516 case AMDGPU::SI_CALL_ISEL: {
4529 MI.eraseFromParent();
4532 case AMDGPU::V_ADD_CO_U32_e32:
4533 case AMDGPU::V_SUB_CO_U32_e32:
4534 case AMDGPU::V_SUBREV_CO_U32_e32: {
4537 unsigned Opc =
MI.getOpcode();
4540 if (
TII->pseudoToMCOpcode(Opc) == -1) {
4546 if (
TII->isVOP3(*
I)) {
4551 I.add(
MI.getOperand(1))
4552 .add(
MI.getOperand(2));
4556 TII->legalizeOperands(*
I);
4558 MI.eraseFromParent();
4561 case AMDGPU::V_ADDC_U32_e32:
4562 case AMDGPU::V_SUBB_U32_e32:
4563 case AMDGPU::V_SUBBREV_U32_e32:
4566 TII->legalizeOperands(
MI);
4568 case AMDGPU::DS_GWS_INIT:
4569 case AMDGPU::DS_GWS_SEMA_BR:
4570 case AMDGPU::DS_GWS_BARRIER:
4571 TII->enforceOperandRCAlignment(
MI, AMDGPU::OpName::data0);
4573 case AMDGPU::DS_GWS_SEMA_V:
4574 case AMDGPU::DS_GWS_SEMA_P:
4575 case AMDGPU::DS_GWS_SEMA_RELEASE_ALL:
4583 case AMDGPU::S_SETREG_B32: {
4622 if (Def && Def->isMoveImmediate() && Def->getOperand(1).isImm()) {
4623 unsigned ImmVal = Def->getOperand(1).getImm();
4637 MI.eraseFromParent();
4646 MI.setDesc(
TII->get(AMDGPU::S_SETREG_B32_mode));
4650 case AMDGPU::S_INVERSE_BALLOT_U32:
4651 case AMDGPU::S_INVERSE_BALLOT_U64: {
4656 const Register DstReg =
MI.getOperand(0).getReg();
4666 MI.eraseFromParent();
4669 case AMDGPU::ENDPGM_TRAP: {
4672 MI.setDesc(
TII->get(AMDGPU::S_ENDPGM));
4690 MI.eraseFromParent();
4699 switch (
Op.getValue(0).getSimpleValueType().SimpleTy) {
4734 return (VT == MVT::i16) ? MVT::i16 : MVT::i32;
4738 return (Ty.getScalarSizeInBits() <= 16 && Subtarget->
has16BitInsts())
4740 : Ty.changeElementSize(32);
4790 switch (Ty.getScalarSizeInBits()) {
4808 if (Ty.getScalarSizeInBits() == 16)
4810 if (Ty.getScalarSizeInBits() == 32)
4821 EVT VT =
N->getValueType(0);
4825 if (VT == MVT::f16) {
4841 unsigned Opc =
Op.getOpcode();
4842 EVT VT =
Op.getValueType();
4843 assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
4844 VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 ||
4845 VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
4864 unsigned Opc =
Op.getOpcode();
4865 EVT VT =
Op.getValueType();
4866 assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
4867 VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 ||
4868 VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
4888 unsigned Opc =
Op.getOpcode();
4889 EVT VT =
Op.getValueType();
4890 assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 ||
4891 VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v16i16 ||
4892 VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
4899 : std::pair(Op0, Op0);
4918 switch (
Op.getOpcode()) {
4924 assert((!Result.getNode() ||
4925 Result.getNode()->getNumValues() == 2) &&
4926 "Load should return a value and a chain");
4930 if (
Op.getValueType() == MVT::f64)
4931 return lowerFSQRTF64(
Op, DAG);
4935 return LowerTrig(
Op, DAG);
4944 return LowerGlobalAddress(MFI,
Op, DAG);
4951 return lowerINSERT_SUBVECTOR(
Op, DAG);
4953 return lowerINSERT_VECTOR_ELT(
Op, DAG);
4955 return lowerEXTRACT_VECTOR_ELT(
Op, DAG);
4957 return lowerVECTOR_SHUFFLE(
Op, DAG);
4959 return lowerSCALAR_TO_VECTOR(
Op, DAG);
4961 return lowerBUILD_VECTOR(
Op, DAG);
4964 return lowerFP_ROUND(
Op, DAG);
4969 if (
Op.getOperand(0)->getValueType(0) != MVT::f32)
4981 return DAG.
getNode(Opc,
DL,
Op.getNode()->getVTList(),
Op->getOperand(0));
4984 return lowerTRAP(
Op, DAG);
4986 return lowerDEBUGTRAP(
Op, DAG);
4994 return lowerFMINNUM_FMAXNUM(
Op, DAG);
4997 return lowerFLDEXP(
Op, DAG);
5024 return lowerXMULO(
Op, DAG);
5027 return lowerXMUL_LOHI(
Op, DAG);
5046 if ((
LoadVT.getVectorNumElements() % 2) == 1) {
5049 LoadVT.getVectorNumElements() + 1);
5064 if ((
LoadVT.getVectorNumElements() % 2) == 1)
5077SDValue SITargetLowering::adjustLoadValueType(
unsigned Opcode,
5091 LoadVT.getVectorNumElements());
5092 }
else if ((
LoadVT.getVectorNumElements() % 2) == 1) {
5096 LoadVT.getVectorNumElements() + 1);
5106 VTList, Ops, M->getMemoryVT(),
5107 M->getMemOperand());
5124 assert(M->getNumValues() == 2 || M->getNumValues() == 3);
5125 bool IsTFE = M->getNumValues() == 3;
5144 return handleByteShortBufferLoads(DAG,
LoadVT,
DL, Ops, M);
5147 return getMemIntrinsicNode(Opc,
DL, M->getVTList(), Ops,
IntVT,
5148 M->getMemOperand(), DAG);
5154 M->getMemOperand(), DAG);
5162 EVT VT =
N->getValueType(0);
5164 unsigned CondCode = CD->getZExtValue();
5197 EVT VT =
N->getValueType(0);
5200 unsigned CondCode = CD->getZExtValue();
5227 EVT VT =
N->getValueType(0);
5234 Src.getOperand(1), Src.getOperand(2));
5245 Exec = AMDGPU::EXEC_LO;
5247 Exec = AMDGPU::EXEC;
5265 switch (
N->getOpcode()) {
5279 case Intrinsic::amdgcn_make_buffer_rsrc:
5280 Results.push_back(lowerPointerAsRsrcIntrin(
N, DAG));
5282 case Intrinsic::amdgcn_cvt_pkrtz: {
5291 case Intrinsic::amdgcn_cvt_pknorm_i16:
5292 case Intrinsic::amdgcn_cvt_pknorm_u16:
5293 case Intrinsic::amdgcn_cvt_pk_i16:
5294 case Intrinsic::amdgcn_cvt_pk_u16: {
5300 if (IID == Intrinsic::amdgcn_cvt_pknorm_i16)
5302 else if (IID == Intrinsic::amdgcn_cvt_pknorm_u16)
5304 else if (IID == Intrinsic::amdgcn_cvt_pk_i16)
5309 EVT VT =
N->getValueType(0);
5325 for (
unsigned I = 0;
I < Res.getNumOperands();
I++) {
5326 Results.push_back(Res.getOperand(
I));
5330 Results.push_back(Res.getValue(1));
5339 EVT VT =
N->getValueType(0);
5345 if (
NewVT.bitsLT(MVT::i32)) {
5352 N->getOperand(0), LHS, RHS);
5360 if (
N->getValueType(0) != MVT::v2f16)
5373 if (
N->getValueType(0) != MVT::v2f16)
5398 if (
I.getUse().get() !=
Value)
5401 if (
I->getOpcode() == Opcode)
5407unsigned SITargetLowering::isCFIntrinsic(
const SDNode *
Intr)
const {
5410 case Intrinsic::amdgcn_if:
5412 case Intrinsic::amdgcn_else:
5414 case Intrinsic::amdgcn_loop:
5416 case Intrinsic::amdgcn_end_cf:
5462 SDNode *
Intr = BRCOND.getOperand(1).getNode();
5475 assert(BR &&
"brcond missing unconditional branch user");
5476 Target = BR->getOperand(1);
5489 (
SetCC->getConstantOperandVal(1) == 1 &&
5496 Ops.push_back(
BRCOND.getOperand(0));
5528 for (
unsigned i = 1, e =
Intr->getNumValues() - 1; i !=
e; ++i) {
5545 Intr->getOperand(0));
5552 MVT VT =
Op.getSimpleValueType();
5561 if (Info->isEntryFunction())
5579 return Op.getValueType().bitsLE(VT) ?
5586 assert(
Op.getValueType() == MVT::f16 &&
5587 "Do not know how to custom lower FP_ROUND for non-f16 type");
5591 if (
SrcVT != MVT::f64)
5607 EVT VT =
Op.getValueType();
5619 if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16)
5626 EVT VT =
Op.getValueType();
5631 if (
ExpVT == MVT::i16)
5659 EVT VT =
Op.getValueType();
5668 if (
C.isPowerOf2()) {
5675 SL, VT,
Result, ShiftAmt),
5695 if (
Op->isDivergent()) {
5712 return lowerTrapEndpgm(
Op, DAG);
5717 return lowerTrapHsaQueuePtr(
Op, DAG);
5720 lowerTrapHsaQueuePtr(
Op, DAG);
5723SDValue SITargetLowering::lowerTrapEndpgm(
5731 const SDLoc &
DL,
Align Alignment, ImplicitParameter Param)
const {
5741SDValue SITargetLowering::lowerTrapHsaQueuePtr(
5751 loadImplicitKernelArgument(DAG, MVT::i64, SL,
Align(8),
QUEUE_PTR);
5755 Register UserSGPR = Info->getQueuePtrUserSGPR();
5757 if (UserSGPR == AMDGPU::NoRegister) {
5782SDValue SITargetLowering::lowerTrapHsa(
5803 "debugtrap handler not supported",
5819SDValue SITargetLowering::getSegmentAperture(
unsigned AS,
const SDLoc &
DL,
5823 ? AMDGPU::SRC_SHARED_BASE
5847 {SDValue(Mov, 0), DAG.getConstant(32, DL, MVT::i64)}));
5856 return loadImplicitKernelArgument(DAG, MVT::i32,
DL,
Align(4), Param);
5861 Register UserSGPR = Info->getQueuePtrUserSGPR();
5862 if (UserSGPR == AMDGPU::NoRegister) {
5869 DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
5897 return ConstVal->getSExtValue() !=
TM.getNullPointerValue(AddrSpace);
5911 unsigned SrcAS =
ASC->getSrcAddressSpace();
5918 unsigned DestAS =
ASC->getDestAddressSpace();
5961 Op.getValueType() == MVT::i64) {
5970 Src.getValueType() == MVT::i64)
6000 if (
EltVT.getScalarSizeInBits() == 16 &&
IdxVal % 2 == 0) {
6046 unsigned EltSize =
EltVT.getSizeInBits();
6052 if (NumElts == 4 && EltSize == 16 &&
KIdx) {
6063 unsigned Idx =
KIdx->getZExtValue();
6087 assert(VecSize <= 64 &&
"Expected target vector size to be <= 64 bits");
6137 if (VecSize == 128 || VecSize == 256) {
6142 if (VecSize == 128) {
6155 for (
unsigned P = 0;
P < 4; ++
P) {
6161 Parts[0], Parts[1]));
6163 Parts[2], Parts[3]));
6183 Src = DAG.
getBitcast(Src.getValueType().changeTypeToInteger(), Src);
6187 unsigned EltSize =
EltVT.getSizeInBits();
6208 return Mask[
Elt + 1] == Mask[
Elt] + 1 && (Mask[
Elt] % 2 == 0);
6219 int SrcNumElts =
Op.getOperand(0).getValueType().getVectorNumElements();
6232 for (
int I = 0,
N =
ResultVT.getVectorNumElements();
I !=
N;
I += 2) {
6234 const int Idx =
SVN->getMaskElt(
I);
6242 const int Idx0 =
SVN->getMaskElt(
I);
6243 const int Idx1 =
SVN->getMaskElt(
I + 1);
6273 for (
int I = 1,
E =
ResultVT.getVectorNumElements();
I <
E; ++
I)
6274 VElts.push_back(UndefVal);
6282 EVT VT =
Op.getValueType();
6284 if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6285 VT == MVT::v8i16 || VT == MVT::v8f16) {
6308 if (VT == MVT::v16i16 || VT == MVT::v16f16) {
6315 for (
unsigned P = 0;
P < 4; ++
P)
6316 Parts[
P].push_back(
Op.getOperand(
I +
P *
E));
6319 for (
unsigned P = 0;
P < 4; ++
P) {
6329 assert(VT == MVT::v2f16 || VT == MVT::v2i16);
6436 assert(
PtrVT == MVT::i32 &&
"32-bit pointer is expected.");
6495 SDValue Param = lowerKernargMemParameter(
6505 "non-hsa intrinsic with hsa target",
6514 "intrinsic not supported on subtarget",
6524 unsigned NumElts =
Elts.size();
6526 if (NumElts <= 12) {
6535 for (
unsigned i = 0; i <
Elts.size(); ++i) {
6537 if (
Elt.getValueType() != MVT::f32)
6541 for (
unsigned i =
Elts.size(); i < NumElts; ++i)
6555 if (
SrcVT.isVector())
6558 Elts.push_back(Src);
6562 Elts.push_back(Undef);
6614 if (!
Data.getValueType().isInteger())
6616 Data.getValueType().changeTypeToInteger(),
Data);
6620 if ((
ReqRetVT.getVectorNumElements() % 2) == 1 &&
6621 ReqRetVT.getVectorElementType().getSizeInBits() == 16) {
6624 ReqRetVT.getVectorNumElements() + 1);
6637 if (Result->getNumValues() == 1)
6664 unsigned DimIdx,
unsigned EndIdx,
6665 unsigned NumGradients) {
6667 for (
unsigned I =
DimIdx;
I < EndIdx;
I++) {
6675 if (((
I + 1) >= EndIdx) ||
6676 ((NumGradients / 2) % 2 == 1 && (
I ==
DimIdx + (NumGradients / 2) - 1 ||
6677 I ==
DimIdx + NumGradients - 1))) {
6678 if (
Addr.getValueType() != MVT::i16)
6718 if (BaseOpcode->Atomic) {
6721 bool Is64Bit =
VData.getValueType() == MVT::i64;
6722 if (BaseOpcode->AtomicX2) {
6729 ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32;
6730 DMask = Is64Bit ? 0xf : 0x3;
6733 DMask = Is64Bit ? 0x3 : 0x1;
6742 if (BaseOpcode->Store) {
6746 if (
StoreVT.getScalarType() == MVT::f16) {
6759 if (
LoadVT.getScalarType() == MVT::f16) {
6800 for (
unsigned I =
Intr->VAddrStart;
I <
Intr->GradientStart;
I++) {
6802 assert(
I ==
Intr->BiasIndex &&
"Got unexpected 16-bit extra argument");
6811 "Bias needs to be converted to 16 bit in A16 mode");
6816 if (BaseOpcode->Gradients && !
ST->hasG16() && (
IsA16 !=
IsG16)) {
6820 dbgs() <<
"Failed to lower image intrinsic: 16 bit addresses "
6821 "require 16 bit args for both gradients and addresses");
6826 if (!
ST->hasA16()) {
6827 LLVM_DEBUG(
dbgs() <<
"Failed to lower image intrinsic: Target does not "
6828 "support 16 bit addresses\n");
6838 if (BaseOpcode->Gradients &&
IsG16 &&
ST->hasG16()) {
6884 const bool HasPartialNSAEncoding =
ST->hasPartialNSAEncoding();
6885 const bool UseNSA =
ST->hasNSAEncoding() &&
6886 VAddrs.size() >=
ST->getNSAThreshold(MF) &&
6903 if (!BaseOpcode->Sampler) {
6958 if (BaseOpcode->Atomic)
6964 if (BaseOpcode->Store || BaseOpcode->Atomic)
6965 Ops.push_back(
VData);
6968 Ops.push_back(VAddr);
6973 Ops.push_back(VAddr);
6975 if (BaseOpcode->Sampler)
6980 Ops.push_back(
Unorm);
6982 Ops.push_back(
IsA16 &&
6983 ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
6985 Ops.push_back(
IsA16 ? True : False);
6993 Ops.push_back(
DimInfo->DA ? True : False);
6994 if (BaseOpcode->HasD16)
6995 Ops.push_back(
IsD16 ? True : False);
6997 Ops.push_back(
Op.getOperand(0));
7005 UseNSA ? AMDGPU::MIMGEncGfx11NSA
7010 UseNSA ? AMDGPU::MIMGEncGfx10NSA
7019 "requested image instruction is not supported on this GPU");
7038 if (BaseOpcode->AtomicX2) {
7043 if (BaseOpcode->Store)
7066 if (!
Offset->isDivergent()) {
7092 unsigned NumLoads = 1;
7094 unsigned NumElts =
LoadVT.isVector() ?
LoadVT.getVectorNumElements() : 1;
7096 LoadVT.getScalarType() == MVT::f32));
7098 if (NumElts == 8 || NumElts == 16) {
7099 NumLoads = NumElts / 4;
7117 setBufferOffsets(
Offset, DAG, &Ops[3],
7118 NumLoads > 1 ?
Align(16 * NumLoads) :
Align(4));
7121 for (
unsigned i = 0; i < NumLoads; ++i) {
7127 if (NumElts == 8 || NumElts == 16)
7163 EVT VT =
Op.getValueType();
7169 switch (IntrinsicID) {
7170 case Intrinsic::amdgcn_implicit_buffer_ptr: {
7173 return getPreloadedValue(DAG, *MFI, VT,
7176 case Intrinsic::amdgcn_dispatch_ptr:
7177 case Intrinsic::amdgcn_queue_ptr: {
7180 MF.
getFunction(),
"unsupported hsa intrinsic without hsa target",
7186 auto RegID = IntrinsicID == Intrinsic::amdgcn_dispatch_ptr ?
7188 return getPreloadedValue(DAG, *MFI, VT, RegID);
7190 case Intrinsic::amdgcn_implicitarg_ptr: {
7192 return getImplicitArgPtr(DAG,
DL);
7193 return getPreloadedValue(DAG, *MFI, VT,
7196 case Intrinsic::amdgcn_kernarg_segment_ptr: {
7202 return getPreloadedValue(DAG, *MFI, VT,
7205 case Intrinsic::amdgcn_dispatch_id: {
7208 case Intrinsic::amdgcn_rcp:
7210 case Intrinsic::amdgcn_rsq:
7212 case Intrinsic::amdgcn_rsq_legacy:
7216 case Intrinsic::amdgcn_rcp_legacy:
7220 case Intrinsic::amdgcn_rsq_clamp: {
7234 case Intrinsic::r600_read_ngroups_x:
7238 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
7241 case Intrinsic::r600_read_ngroups_y:
7245 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
7248 case Intrinsic::r600_read_ngroups_z:
7252 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
7255 case Intrinsic::r600_read_global_size_x:
7259 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
7262 case Intrinsic::r600_read_global_size_y:
7266 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
7269 case Intrinsic::r600_read_global_size_z:
7273 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
7276 case Intrinsic::r600_read_local_size_x:
7280 return lowerImplicitZextParam(DAG,
Op, MVT::i16,
7282 case Intrinsic::r600_read_local_size_y:
7286 return lowerImplicitZextParam(DAG,
Op, MVT::i16,
7288 case Intrinsic::r600_read_local_size_z:
7292 return lowerImplicitZextParam(DAG,
Op, MVT::i16,
7294 case Intrinsic::amdgcn_workgroup_id_x:
7295 return getPreloadedValue(DAG, *MFI, VT,
7297 case Intrinsic::amdgcn_workgroup_id_y:
7298 return getPreloadedValue(DAG, *MFI, VT,
7300 case Intrinsic::amdgcn_workgroup_id_z:
7301 return getPreloadedValue(DAG, *MFI, VT,
7303 case Intrinsic::amdgcn_lds_kernel_id: {
7305 return getLDSKernelId(DAG,
DL);
7306 return getPreloadedValue(DAG, *MFI, VT,
7309 case Intrinsic::amdgcn_workitem_id_x:
7310 return lowerWorkitemID(DAG,
Op, 0, MFI->getArgInfo().WorkItemIDX);
7311 case Intrinsic::amdgcn_workitem_id_y:
7312 return lowerWorkitemID(DAG,
Op, 1, MFI->getArgInfo().WorkItemIDY);
7313 case Intrinsic::amdgcn_workitem_id_z:
7314 return lowerWorkitemID(DAG,
Op, 2, MFI->getArgInfo().WorkItemIDZ);
7315 case Intrinsic::amdgcn_wavefrontsize:
7318 case Intrinsic::amdgcn_s_buffer_load: {
7322 return lowerSBuffer(VT,
DL,
Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3),
7325 case Intrinsic::amdgcn_fdiv_fast:
7326 return lowerFDIV_FAST(
Op, DAG);
7327 case Intrinsic::amdgcn_sin:
7330 case Intrinsic::amdgcn_cos:
7333 case Intrinsic::amdgcn_mul_u24:
7335 case Intrinsic::amdgcn_mul_i24:
7338 case Intrinsic::amdgcn_log_clamp: {
7344 case Intrinsic::amdgcn_ldexp:
7347 case Intrinsic::amdgcn_fract:
7350 case Intrinsic::amdgcn_class:
7352 Op.getOperand(1),
Op.getOperand(2));
7353 case Intrinsic::amdgcn_div_fmas:
7355 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3),
7358 case Intrinsic::amdgcn_div_fixup:
7360 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
7362 case Intrinsic::amdgcn_div_scale: {
7375 SDValue Src0 = Param->isAllOnes() ? Numerator : Denominator;
7378 Denominator, Numerator);
7380 case Intrinsic::amdgcn_icmp: {
7382 if (
Op.getOperand(1).getValueType() == MVT::i1 &&
7383 Op.getConstantOperandVal(2) == 0 &&
7388 case Intrinsic::amdgcn_fcmp: {
7391 case Intrinsic::amdgcn_ballot:
7393 case Intrinsic::amdgcn_fmed3:
7395 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
7396 case Intrinsic::amdgcn_fdot2:
7398 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3),
7400 case Intrinsic::amdgcn_fmul_legacy:
7402 Op.getOperand(1),
Op.getOperand(2));
7403 case Intrinsic::amdgcn_sffbh:
7405 case Intrinsic::amdgcn_sbfe:
7407 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
7408 case Intrinsic::amdgcn_ubfe:
7410 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
7411 case Intrinsic::amdgcn_cvt_pkrtz:
7412 case Intrinsic::amdgcn_cvt_pknorm_i16:
7413 case Intrinsic::amdgcn_cvt_pknorm_u16:
7414 case Intrinsic::amdgcn_cvt_pk_i16:
7415 case Intrinsic::amdgcn_cvt_pk_u16: {
7417 EVT VT =
Op.getValueType();
7420 if (IntrinsicID == Intrinsic::amdgcn_cvt_pkrtz)
7422 else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_i16)
7424 else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_u16)
7426 else if (IntrinsicID == Intrinsic::amdgcn_cvt_pk_i16)
7432 return DAG.
getNode(Opcode,
DL, VT,
Op.getOperand(1),
Op.getOperand(2));
7435 Op.getOperand(1),
Op.getOperand(2));
7438 case Intrinsic::amdgcn_fmad_ftz:
7440 Op.getOperand(2),
Op.getOperand(3));
7442 case Intrinsic::amdgcn_if_break:
7444 Op->getOperand(1),
Op->getOperand(2)), 0);
7446 case Intrinsic::amdgcn_groupstaticsize: {
7458 case Intrinsic::amdgcn_is_shared:
7459 case Intrinsic::amdgcn_is_private: {
7461 unsigned AS = (IntrinsicID == Intrinsic::amdgcn_is_shared) ?
7471 case Intrinsic::amdgcn_perm:
7473 Op.getOperand(2),
Op.getOperand(3));
7474 case Intrinsic::amdgcn_reloc_constant: {
7499 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
7500 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
7517 M->getMemOperand());
7532 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
7533 auto Offsets = splitBufferOffsets(
Op.getOperand(5), DAG);
7550 M->getMemOperand());
7559 case Intrinsic::amdgcn_ds_ordered_add:
7560 case Intrinsic::amdgcn_ds_ordered_swap: {
7566 unsigned WaveRelease = M->getConstantOperandVal(8);
7567 unsigned WaveDone = M->getConstantOperandVal(9);
7579 "ds_ordered_count: dword count must be between 1 and 4");
7589 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
7610 M->getVTList(), Ops, M->getMemoryVT(),
7611 M->getMemOperand());
7613 case Intrinsic::amdgcn_ds_fadd: {
7617 case Intrinsic::amdgcn_ds_fadd:
7623 M->getOperand(0), M->getOperand(2), M->getOperand(3),
7624 M->getMemOperand());
7626 case Intrinsic::amdgcn_ds_fmin:
7627 case Intrinsic::amdgcn_ds_fmax: {
7631 case Intrinsic::amdgcn_ds_fmin:
7634 case Intrinsic::amdgcn_ds_fmax:
7647 M->getMemoryVT(), M->getMemOperand());
7649 case Intrinsic::amdgcn_buffer_load:
7650 case Intrinsic::amdgcn_buffer_load_format: {
7664 setBufferOffsets(
Op.getOperand(4), DAG, &Ops[3]);
7666 unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
7669 EVT VT =
Op.getValueType();
7674 if (
LoadVT.getScalarType() == MVT::f16)
7679 if (
LoadVT.getScalarType() == MVT::i8 ||
7680 LoadVT.getScalarType() == MVT::i16)
7681 return handleByteShortBufferLoads(DAG,
LoadVT,
DL, Ops, M);
7683 return getMemIntrinsicNode(Opc,
DL,
Op->getVTList(), Ops,
IntVT,
7684 M->getMemOperand(), DAG);
7686 case Intrinsic::amdgcn_raw_buffer_load:
7687 case Intrinsic::amdgcn_raw_ptr_buffer_load:
7688 case Intrinsic::amdgcn_raw_buffer_load_format:
7689 case Intrinsic::amdgcn_raw_ptr_buffer_load_format: {
7691 IntrID == Intrinsic::amdgcn_raw_buffer_load_format ||
7692 IntrID == Intrinsic::amdgcn_raw_ptr_buffer_load_format;
7694 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
7695 auto Offsets = splitBufferOffsets(
Op.getOperand(3), DAG);
7708 return lowerIntrinsicLoad(M,
IsFormat, DAG, Ops);
7710 case Intrinsic::amdgcn_struct_buffer_load:
7711 case Intrinsic::amdgcn_struct_ptr_buffer_load:
7712 case Intrinsic::amdgcn_struct_buffer_load_format:
7713 case Intrinsic::amdgcn_struct_ptr_buffer_load_format: {
7715 IntrID == Intrinsic::amdgcn_struct_buffer_load_format ||
7716 IntrID == Intrinsic::amdgcn_struct_ptr_buffer_load_format;
7718 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
7719 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
7733 case Intrinsic::amdgcn_tbuffer_load: {
7754 if (
LoadVT.getScalarType() == MVT::f16)
7758 Op->getVTList(), Ops,
LoadVT, M->getMemOperand(),
7761 case Intrinsic::amdgcn_raw_tbuffer_load:
7762 case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
7765 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
7766 auto Offsets = splitBufferOffsets(
Op.getOperand(3), DAG);
7780 if (
LoadVT.getScalarType() == MVT::f16)
7784 Op->getVTList(), Ops,
LoadVT, M->getMemOperand(),
7787 case Intrinsic::amdgcn_struct_tbuffer_load:
7788 case Intrinsic::amdgcn_struct_ptr_tbuffer_load: {
7791 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
7792 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
7806 if (
LoadVT.getScalarType() == MVT::f16)
7810 Op->getVTList(), Ops,
LoadVT, M->getMemOperand(),
7813 case Intrinsic::amdgcn_buffer_atomic_swap:
7814 case Intrinsic::amdgcn_buffer_atomic_add:
7815 case Intrinsic::amdgcn_buffer_atomic_sub:
7816 case Intrinsic::amdgcn_buffer_atomic_csub:
7817 case Intrinsic::amdgcn_buffer_atomic_smin:
7818 case Intrinsic::amdgcn_buffer_atomic_umin:
7819 case Intrinsic::amdgcn_buffer_atomic_smax:
7820 case Intrinsic::amdgcn_buffer_atomic_umax:
7821 case Intrinsic::amdgcn_buffer_atomic_and:
7822 case Intrinsic::amdgcn_buffer_atomic_or:
7823 case Intrinsic::amdgcn_buffer_atomic_xor:
7824 case Intrinsic::amdgcn_buffer_atomic_fadd: {
7838 setBufferOffsets(
Op.getOperand(5), DAG, &Ops[4]);
7840 EVT VT =
Op.getValueType();
7843 unsigned Opcode = 0;
7846 case Intrinsic::amdgcn_buffer_atomic_swap:
7849 case Intrinsic::amdgcn_buffer_atomic_add:
7852 case Intrinsic::amdgcn_buffer_atomic_sub:
7855 case Intrinsic::amdgcn_buffer_atomic_csub:
7858 case Intrinsic::amdgcn_buffer_atomic_smin:
7861 case Intrinsic::amdgcn_buffer_atomic_umin:
7864 case Intrinsic::amdgcn_buffer_atomic_smax:
7867 case Intrinsic::amdgcn_buffer_atomic_umax:
7870 case Intrinsic::amdgcn_buffer_atomic_and:
7873 case Intrinsic::amdgcn_buffer_atomic_or:
7876 case Intrinsic::amdgcn_buffer_atomic_xor:
7879 case Intrinsic::amdgcn_buffer_atomic_fadd:
7887 M->getMemOperand());
7889 case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
7890 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd:
7892 case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
7893 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd:
7895 case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
7896 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin:
7898 case Intrinsic::amdgcn_struct_buffer_atomic_fmin:
7899 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmin:
7901 case Intrinsic::amdgcn_raw_buffer_atomic_fmax:
7902 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax:
7904 case Intrinsic::amdgcn_struct_buffer_atomic_fmax:
7905 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmax:
7907 case Intrinsic::amdgcn_raw_buffer_atomic_swap:
7908 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
7910 case Intrinsic::amdgcn_raw_buffer_atomic_add:
7911 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_add:
7913 case Intrinsic::amdgcn_raw_buffer_atomic_sub:
7914 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub:
7916 case Intrinsic::amdgcn_raw_buffer_atomic_smin:
7917 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin:
7919 case Intrinsic::amdgcn_raw_buffer_atomic_umin:
7920 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin:
7922 case Intrinsic::amdgcn_raw_buffer_atomic_smax:
7923 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax:
7925 case Intrinsic::amdgcn_raw_buffer_atomic_umax:
7926 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax:
7928 case Intrinsic::amdgcn_raw_buffer_atomic_and:
7929 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_and:
7931 case Intrinsic::amdgcn_raw_buffer_atomic_or:
7932 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_or:
7934 case Intrinsic::amdgcn_raw_buffer_atomic_xor:
7935 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor:
7937 case Intrinsic::amdgcn_raw_buffer_atomic_inc:
7938 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_inc:
7940 case Intrinsic::amdgcn_raw_buffer_atomic_dec:
7941 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_dec:
7943 case Intrinsic::amdgcn_struct_buffer_atomic_swap:
7944 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_swap:
7945 return lowerStructBufferAtomicIntrin(
Op, DAG,
7947 case Intrinsic::amdgcn_struct_buffer_atomic_add:
7948 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_add:
7950 case Intrinsic::amdgcn_struct_buffer_atomic_sub:
7951 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_sub:
7953 case Intrinsic::amdgcn_struct_buffer_atomic_smin:
7954 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smin:
7955 return lowerStructBufferAtomicIntrin(
Op, DAG,
7957 case Intrinsic::amdgcn_struct_buffer_atomic_umin:
7958 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umin:
7959 return lowerStructBufferAtomicIntrin(
Op, DAG,
7961 case Intrinsic::amdgcn_struct_buffer_atomic_smax:
7962 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smax:
7963 return lowerStructBufferAtomicIntrin(
Op, DAG,
7965 case Intrinsic::amdgcn_struct_buffer_atomic_umax:
7966 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umax:
7967 return lowerStructBufferAtomicIntrin(
Op, DAG,
7969 case Intrinsic::amdgcn_struct_buffer_atomic_and:
7970 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_and:
7972 case Intrinsic::amdgcn_struct_buffer_atomic_or:
7973 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_or:
7975 case Intrinsic::amdgcn_struct_buffer_atomic_xor:
7976 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_xor:
7978 case Intrinsic::amdgcn_struct_buffer_atomic_inc:
7979 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_inc:
7981 case Intrinsic::amdgcn_struct_buffer_atomic_dec:
7982 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_dec:
7985 case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
8000 setBufferOffsets(
Op.getOperand(6), DAG, &Ops[5]);
8002 EVT VT =
Op.getValueType();
8006 Op->getVTList(), Ops, VT, M->getMemOperand());
8008 case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
8009 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap: {
8010 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(4), DAG);
8011 auto Offsets = splitBufferOffsets(
Op.getOperand(5), DAG);
8024 EVT VT =
Op.getValueType();
8028 Op->getVTList(), Ops, VT, M->getMemOperand());
8030 case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
8031 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_cmpswap: {
8032 SDValue Rsrc = bufferRsrcPtrToVector(
Op->getOperand(4), DAG);
8033 auto Offsets = splitBufferOffsets(
Op.getOperand(6), DAG);
8046 EVT VT =
Op.getValueType();
8050 Op->getVTList(), Ops, VT, M->getMemOperand());
8052 case Intrinsic::amdgcn_image_bvh_intersect_ray: {
8061 assert(NodePtr.getValueType() == MVT::i32 ||
8062 NodePtr.getValueType() == MVT::i64);
8064 RayDir.getValueType() == MVT::v3f32);
8072 const bool IsA16 =
RayDir.getValueType().getVectorElementType() == MVT::f16;
8073 const bool Is64 = NodePtr.getValueType() == MVT::i64;
8080 {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
8081 {AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
8082 AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
8103 if (Lanes[0].getValueSizeInBits() == 32) {
8104 for (
unsigned I = 0;
I < 3; ++
I)
8105 Ops.push_back(DAG.
getBitcast(MVT::i32, Lanes[
I]));
8111 { Lanes[0], Lanes[1] })));
8112 Ops.push_back(Lanes[2]);
8118 { Elt0, Lanes[0] })));
8122 { Lanes[1], Lanes[2] })));
8128 Ops.push_back(NodePtr);
8135 for (
unsigned I = 0;
I < 3; ++
I) {
8138 {DirLanes[I], InvDirLanes[I]})));
8150 Ops.push_back(NodePtr);
8162 Ops.append(16 - Ops.size(), Undef);
8164 assert(Ops.size() >= 8 && Ops.size() <= 12);
8173 Ops.push_back(M->getChain());
8180 case Intrinsic::amdgcn_global_atomic_fmin:
8181 case Intrinsic::amdgcn_global_atomic_fmax:
8182 case Intrinsic::amdgcn_flat_atomic_fmin:
8183 case Intrinsic::amdgcn_flat_atomic_fmax: {
8190 unsigned Opcode = 0;
8192 case Intrinsic::amdgcn_global_atomic_fmin:
8193 case Intrinsic::amdgcn_flat_atomic_fmin: {
8197 case Intrinsic::amdgcn_global_atomic_fmax:
8198 case Intrinsic::amdgcn_flat_atomic_fmax: {
8206 M->getVTList(), Ops, M->getMemoryVT(),
8207 M->getMemOperand());
8221SDValue SITargetLowering::getMemIntrinsicNode(
unsigned Opcode,
const SDLoc &
DL,
8255 (VT == MVT::v3i32 || VT == MVT::v3f32)) {
8279 unsigned NumElements =
StoreVT.getVectorNumElements();
8306 for (
unsigned I = 0;
I <
Elts.size() / 2;
I += 1) {
8312 if ((NumElements % 2) == 1) {
8314 unsigned I =
Elts.size() / 2;
8330 if (NumElements == 3) {
8354 switch (IntrinsicID) {
8355 case Intrinsic::amdgcn_exp_compr: {
8359 "intrinsic not supported on subtarget",
DL.getDebugLoc());
8382 unsigned Opc =
Done->isZero() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
8385 case Intrinsic::amdgcn_s_barrier: {
8389 if (
WGSize <=
ST.getWavefrontSize())
8391 Op.getOperand(0)), 0);
8395 case Intrinsic::amdgcn_tbuffer_store: {
8397 bool IsD16 = (
VData.getValueType().getScalarType() == MVT::f16);
8421 M->getMemoryVT(), M->getMemOperand());
8424 case Intrinsic::amdgcn_struct_tbuffer_store:
8425 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
8427 bool IsD16 = (
VData.getValueType().getScalarType() == MVT::f16);
8430 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
8431 auto Offsets = splitBufferOffsets(
Op.getOperand(5), DAG);
8448 M->getMemoryVT(), M->getMemOperand());
8451 case Intrinsic::amdgcn_raw_tbuffer_store:
8452 case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
8454 bool IsD16 = (
VData.getValueType().getScalarType() == MVT::f16);
8457 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
8458 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
8475 M->getMemoryVT(), M->getMemOperand());
8478 case Intrinsic::amdgcn_buffer_store:
8479 case Intrinsic::amdgcn_buffer_store_format: {
8481 bool IsD16 = (
VData.getValueType().getScalarType() == MVT::f16);
8498 setBufferOffsets(
Op.getOperand(5), DAG, &Ops[4]);
8500 unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
8508 return handleByteShortBufferStores(DAG,
VDataType,
DL, Ops, M);
8511 M->getMemoryVT(), M->getMemOperand());
8514 case Intrinsic::amdgcn_raw_buffer_store:
8515 case Intrinsic::amdgcn_raw_ptr_buffer_store:
8516 case Intrinsic::amdgcn_raw_buffer_store_format:
8517 case Intrinsic::amdgcn_raw_ptr_buffer_store_format: {
8519 IntrinsicID == Intrinsic::amdgcn_raw_buffer_store_format ||
8520 IntrinsicID == Intrinsic::amdgcn_raw_ptr_buffer_store_format;
8537 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
8538 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
8557 return handleByteShortBufferStores(DAG,
VDataVT,
DL, Ops, M);
8560 M->getMemoryVT(), M->getMemOperand());
8563 case Intrinsic::amdgcn_struct_buffer_store:
8564 case Intrinsic::amdgcn_struct_ptr_buffer_store:
8565 case Intrinsic::amdgcn_struct_buffer_store_format:
8566 case Intrinsic::amdgcn_struct_ptr_buffer_store_format: {
8568 IntrinsicID == Intrinsic::amdgcn_struct_buffer_store_format ||
8569 IntrinsicID == Intrinsic::amdgcn_struct_ptr_buffer_store_format;
8587 auto Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
8588 auto Offsets = splitBufferOffsets(
Op.getOperand(5), DAG);
8608 return handleByteShortBufferStores(DAG,
VDataType,
DL, Ops, M);
8611 M->getMemoryVT(), M->getMemOperand());
8613 case Intrinsic::amdgcn_raw_buffer_load_lds:
8614 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
8615 case Intrinsic::amdgcn_struct_buffer_load_lds:
8616 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
8619 IntrinsicID == Intrinsic::amdgcn_struct_buffer_load_lds ||
8620 IntrinsicID == Intrinsic::amdgcn_struct_ptr_buffer_load_lds;
8625 unsigned Size =
Op->getConstantOperandVal(4);
8632 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
8633 :
HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
8634 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
8638 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
8639 :
HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
8640 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
8644 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
8645 :
HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
8646 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
8659 Ops.push_back(
Op.getOperand(5));
8663 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
8664 Ops.push_back(
Rsrc);
8665 Ops.push_back(
Op.getOperand(6 + OpOffset));
8666 Ops.push_back(
Op.getOperand(7 + OpOffset));
8667 unsigned Aux =
Op.getConstantOperandVal(8 + OpOffset);
8672 Ops.push_back(
M0Val.getValue(0));
8673 Ops.push_back(
M0Val.getValue(1));
8692 sizeof(int32_t),
LoadMMO->getBaseAlign());
8699 case Intrinsic::amdgcn_global_load_lds: {
8701 unsigned Size =
Op->getConstantOperandVal(4);
8706 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
8709 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
8712 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
8729 if (LHS->isDivergent())
8733 RHS.getOperand(0).getValueType() == MVT::i32) {
8740 Ops.push_back(
Addr);
8741 if (!
Addr->isDivergent()) {
8750 Ops.push_back(
Op.getOperand(5));
8751 Ops.push_back(
Op.getOperand(6));
8752 Ops.push_back(
M0Val.getValue(0));
8753 Ops.push_back(
M0Val.getValue(1));
8757 LoadPtrI.Offset =
Op->getConstantOperandVal(5);
8767 sizeof(int32_t),
Align(4));
8774 case Intrinsic::amdgcn_end_cf:
8776 Op->getOperand(2), Chain), 0);
8794std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
8847 Align Alignment)
const {
8853 if (
TII->splitMUBUFOffset(Imm, SOffset,
ImmOffset, Alignment)) {
8903 std::optional<uint32_t>
ConstStride = std::nullopt;
8933 unsigned Opc = (
LoadVT.getScalarType() == MVT::i8) ?
8939 M->getMemOperand());
8960 M->getMemOperand());
8985 if (
Ld->getAlign() <
Align(4) ||
Ld->isDivergent())
8989 unsigned AS =
Ld->getAddressSpace();
8999 if ((
MemVT.isSimple() && !
DCI.isAfterLegalizeDAG()) ||
9000 MemVT.getSizeInBits() >= 32)
9006 "unexpected vector extload");
9012 Ld->getOffset(),
Ld->getPointerInfo(), MVT::i32,
Ld->getAlign(),
9013 Ld->getMemOperand()->getFlags(),
Ld->getAAInfo(),
9017 if (
MemVT.isFloatingPoint()) {
9019 "unexpected fp extload");
9034 EVT VT =
Ld->getValueType(0);
9037 DCI.AddToWorklist(
Cvt.getNode());
9042 DCI.AddToWorklist(
Cvt.getNode());
9053 if (Info.isEntryFunction())
9054 return Info.hasFlatScratchInit();
9080 if (!
MemVT.isVector()) {
9090 for (
unsigned I = 0,
N =
MemVT.getVectorNumElements();
I !=
N; ++
I) {
9105 if (!
MemVT.isVector())
9108 assert(
Op.getValueType().getVectorElementType() == MVT::i32 &&
9109 "Custom lowering for non-i32 vectors hasn't been implemented.");
9112 unsigned AS =
Load->getAddressSpace();
9114 Alignment.
value() <
MemVT.getStoreSize() &&
MemVT.getSizeInBits() > 32) {
9127 unsigned NumElements =
MemVT.getVectorNumElements();
9131 if (!
Op->isDivergent() && Alignment >=
Align(4) && NumElements < 32) {
9132 if (
MemVT.isPow2VectorType())
9147 Alignment >=
Align(4) && NumElements < 32) {
9148 if (
MemVT.isPow2VectorType())
9161 if (NumElements > 4)
9181 if (NumElements > 2)
9186 if (NumElements > 4)
9198 auto Flags =
Load->getMemOperand()->getFlags();
9204 if (
MemVT.isVector())
9219 EVT VT =
Op.getValueType();
9255 EVT VT =
Op.getValueType();
9268 if (
CLHS->isExactlyValue(1.0)) {
9290 if (
CLHS->isExactlyValue(-1.0)) {
9313 EVT VT =
Op.getValueType();
9339 return DAG.
getNode(Opcode, SL, VT,
A,
B, Flags);
9352 return DAG.
getNode(Opcode, SL, VTList,
9361 return DAG.
getNode(Opcode, SL, VT, {
A,
B,
C}, Flags);
9374 return DAG.
getNode(Opcode, SL, VTList,
9438 assert(ST->hasDenormModeInst() &&
"Requires S_DENORM_MODE");
9453 Flags.setNoFPExcept(
true);
9485 if (!HasFP32Denormals) {
9534 if (!HasFP32Denormals) {
9551 AMDGPU::S_SETREG_B32, SL, MVT::Other,
9635 EVT VT =
Op.getValueType();
9638 return LowerFDIV32(
Op, DAG);
9641 return LowerFDIV64(
Op, DAG);
9644 return LowerFDIV16(
Op, DAG);
9684 if (VT == MVT::i1) {
9687 Store->getBasePtr(), MVT::i1,
Store->getMemOperand());
9691 Store->getValue().getValueType().getScalarType() == MVT::i32);
9693 unsigned AS =
Store->getAddressSpace();
9712 if (NumElements > 4)
9719 VT, *
Store->getMemOperand()))
9728 if (NumElements > 2)
9732 if (NumElements > 4 ||
9741 auto Flags =
Store->getMemOperand()->getFlags();
9840 EVT VT =
Op.getValueType();
9846 auto Flags =
Op->getFlags();
9857 switch (
Op.getOpcode()) {
9883 EVT VT =
Op.getValueType();
9899 DAGCombinerInfo &
DCI)
const {
9900 EVT VT =
N->getValueType(0);
9915 if (
DCI.isAfterLegalizeDAG() &&
SrcVT == MVT::i32) {
9918 DCI.AddToWorklist(
Cvt.getNode());
9933 DAGCombinerInfo &
DCI)
const {
9959 if (
SignOp.getValueType() != MVT::f64)
9995 DAGCombinerInfo &
DCI)
const {
10022 Type *Ty =
MemVT.getTypeForEVT(*
DCI.DAG.getContext());
10025 AM.HasBaseReg =
true;
10026 AM.BaseOffs =
Offset.getSExtValue();
10031 EVT VT =
N->getValueType(0);
10048 switch (
N->getOpcode()) {
10059 DAGCombinerInfo &
DCI)
const {
10069 N->getMemoryVT(),
DCI);
10082 return (Opc ==
ISD::AND && (Val == 0 || Val == 0xffffffff)) ||
10083 (Opc ==
ISD::OR && (Val == 0xffffffff || Val == 0)) ||
10092SDValue SITargetLowering::splitBinaryBitConstantOp(
10093 DAGCombinerInfo &
DCI,
10104 (
CRHS->hasOneUse() && !
TII->isInlineConstant(
CRHS->getAPIntValue()))) {
10117 if (V.getValueType() != MVT::i1)
10119 switch (V.getOpcode()) {
10156 assert(V.getValueSizeInBits() == 32);
10158 if (V.getNumOperands() != 2)
10167 switch (V.getOpcode()) {
10184 return uint32_t((0x030201000c0c0c0cull <<
C) >> 32);
10190 return uint32_t(0x0c0c0c0c03020100ull >>
C);
10197 DAGCombinerInfo &
DCI)
const {
10198 if (
DCI.isBeforeLegalize())
10202 EVT VT =
N->getValueType(0);
10208 if (VT == MVT::i64 &&
CRHS) {
10214 if (
CRHS && VT == MVT::i32) {
10224 unsigned Shift =
CShift->getZExtValue();
10225 unsigned NB =
CRHS->getAPIntValue().countr_zero();
10227 if ((
Offset & (Bits - 1)) == 0) {
10230 LHS->getOperand(0),
10251 Sel = (LHS.getConstantOperandVal(2) &
Sel) | (~
Sel & 0x0c0c0c0c);
10266 if (
Y.getOpcode() !=
ISD::FABS ||
Y.getOperand(0) !=
X ||
10271 if (
X != LHS.getOperand(1))
10276 if (!
C1 || !
C1->isInfinity() ||
C1->isNegative())
10309 (RHS.getOperand(0) == LHS.getOperand(0) &&
10310 LHS.getOperand(0) == LHS.getOperand(1))) {
10322 if (VT == MVT::i32 &&
10334 if (VT == MVT::i32 && LHS.hasOneUse() && RHS.hasOneUse() &&
10335 N->isDivergent() &&
TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
10362 for (
unsigned I = 0;
I < 32;
I += 8) {
10365 Mask &= (0x0c <<
I) & 0xffffffff;
10374 LHS.getOperand(0), RHS.getOperand(0),
10423static const std::optional<ByteProvider<SDValue>>
10425 unsigned Depth = 0) {
10428 return std::nullopt;
10430 switch (
Op->getOpcode()) {
10432 if (
Op->getOperand(0).getScalarValueSizeInBits() != 32)
10433 return std::nullopt;
10440 return std::nullopt;
10445 return std::nullopt;
10453 if (
Op.getScalarValueSizeInBits() != 32)
10454 return std::nullopt;
10468static const std::optional<ByteProvider<SDValue>>
10470 unsigned StartingIndex = 0) {
10474 return std::nullopt;
10476 unsigned BitWidth =
Op.getScalarValueSizeInBits();
10478 return std::nullopt;
10481 switch (
Op.getOpcode()) {
10486 return std::nullopt;
10490 return std::nullopt;
10493 if (!LHS->isConstantZero() && !RHS->isConstantZero())
10494 return std::nullopt;
10495 if (!LHS || LHS->isConstantZero())
10497 if (!RHS || RHS->isConstantZero())
10499 return std::nullopt;
10505 return std::nullopt;
10511 if ((IndexMask & BitMask) != IndexMask) {
10514 if (IndexMask & BitMask)
10515 return std::nullopt;
10525 return std::nullopt;
10529 return std::nullopt;
10533 return std::nullopt;
10550 return std::nullopt;
10554 return std::nullopt;
10564 Depth + 1, StartingIndex);
10572 return std::nullopt;
10577 ? std::optional<ByteProvider<SDValue>>(
10586 return std::nullopt;
10594 return std::nullopt;
10601 return std::nullopt;
10609 ? std::optional<ByteProvider<SDValue>>(
10618 return std::nullopt;
10623 Depth + 1, StartingIndex);
10625 return std::nullopt;
10639 return !
OpVT.isVector() &&
OpVT.getSizeInBits() == 16;
10646 auto MemVT = L->getMemoryVT();
10647 return !
MemVT.isVector() &&
MemVT.getSizeInBits() == 16;
10659 int Low8 = Mask & 0xff;
10660 int Hi8 = (Mask & 0xff00) >> 8;
10683 assert(
Op.getValueType().getSizeInBits() == 32);
10703 DAGCombinerInfo &
DCI)
const {
10708 EVT VT =
N->getValueType(0);
10709 if (VT == MVT::i1) {
10713 SDValue Src = LHS.getOperand(0);
10714 if (Src != RHS.getOperand(0))
10742 Sel |= LHS.getConstantOperandVal(2);
10750 if (VT == MVT::i32 && LHS.hasOneUse() && RHS.hasOneUse() &&
10751 N->isDivergent() &&
TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
10758 !
OrUse->getValueType(0).isVector())
10763 if (!
VUse->getValueType(0).isVector())
10810 LHS.getOperand(0), RHS.getOperand(0),
10819 for (
int i = 0; i < 4; i++) {
10821 std::optional<ByteProvider<SDValue>>
P =
10824 if (!
P ||
P->isConstantZero())
10835 for (
size_t i = 0; i <
PermNodes.size(); i++) {
10861 if (
Op == OtherOp) {
10881 if (VT != MVT::i64 ||
DCI.isBeforeLegalizeOps())
10896 if (
SrcVT == MVT::i32) {
10902 DCI.AddToWorklist(
LowOr.getNode());
10915 N->getOperand(0),
CRHS))
10923 DAGCombinerInfo &
DCI)
const {
10933 EVT VT =
N->getValueType(0);
10934 if (
CRHS && VT == MVT::i64) {
10942 if (LHS.getOpcode() ==
ISD::SELECT && VT == MVT::i32) {
10944 if (
CRHS &&
CRHS->getAPIntValue().isSignMask() &&
10965 DAGCombinerInfo &
DCI)
const {
10970 EVT VT =
N->getValueType(0);
10971 if (VT != MVT::i32)
10975 if (Src.getValueType() != MVT::i16)
10981SDValue SITargetLowering::performSignExtendInRegCombine(
SDNode *
N,
10982 DAGCombinerInfo &
DCI)
10988 VTSign->getVT() == MVT::i8) ||
10990 VTSign->getVT() == MVT::i16)) &&
11005 Src.getOperand(0).getValueType());
11010 Ops, M->getMemoryVT(),
11011 M->getMemOperand());
11019 DAGCombinerInfo &
DCI)
const {
11025 if (
CMask->isZero())
11029 if (
N->getOperand(0).isUndef())
11036 DAGCombinerInfo &
DCI)
const {
11037 EVT VT =
N->getValueType(0);
11041 return DCI.DAG.getConstantFP(
11062 unsigned Opcode =
Op.getOpcode();
11067 const auto &
F = CFP->getValueAPF();
11068 if (
F.isNaN() &&
F.isSignaling())
11070 if (!
F.isDenormal())
11128 return Op.getValueType().getScalarType() != MVT::f16;
11189 if (
Op.getValueType() == MVT::i16) {
11191 if (
TruncSrc.getValueType() == MVT::i32 &&
11193 TruncSrc.getOperand(0).getValueType() == MVT::v2f16) {
11200 unsigned IntrinsicID
11203 switch (IntrinsicID) {
11204 case Intrinsic::amdgcn_cvt_pkrtz:
11205 case Intrinsic::amdgcn_cubeid:
11206 case Intrinsic::amdgcn_frexp_mant:
11207 case Intrinsic::amdgcn_fdot2:
11208 case Intrinsic::amdgcn_rcp:
11209 case Intrinsic::amdgcn_rsq:
11210 case Intrinsic::amdgcn_rsq_clamp:
11211 case Intrinsic::amdgcn_rcp_legacy:
11212 case Intrinsic::amdgcn_rsq_legacy:
11213 case Intrinsic::amdgcn_trig_preop:
11214 case Intrinsic::amdgcn_log:
11215 case Intrinsic::amdgcn_exp2:
11236 unsigned Opcode =
MI->getOpcode();
11238 if (Opcode == AMDGPU::G_FCANONICALIZE)
11241 std::optional<FPValueAndVReg>
FCR;
11244 if (
FCR->Value.isSignaling())
11246 if (!
FCR->Value.isDenormal())
11257 case AMDGPU::G_FADD:
11258 case AMDGPU::G_FSUB:
11259 case AMDGPU::G_FMUL:
11260 case AMDGPU::G_FCEIL:
11261 case AMDGPU::G_FFLOOR:
11262 case AMDGPU::G_FRINT:
11263 case AMDGPU::G_FNEARBYINT:
11264 case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:
11265 case AMDGPU::G_INTRINSIC_TRUNC:
11266 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
11267 case AMDGPU::G_FMA:
11268 case AMDGPU::G_FMAD:
11269 case AMDGPU::G_FSQRT:
11270 case AMDGPU::G_FDIV:
11271 case AMDGPU::G_FREM:
11272 case AMDGPU::G_FPOW:
11273 case AMDGPU::G_FPEXT:
11274 case AMDGPU::G_FLOG:
11275 case AMDGPU::G_FLOG2:
11276 case AMDGPU::G_FLOG10:
11277 case AMDGPU::G_FPTRUNC:
11278 case AMDGPU::G_AMDGPU_RCP_IFLAG:
11279 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
11280 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
11281 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
11282 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
11284 case AMDGPU::G_FNEG:
11285 case AMDGPU::G_FABS:
11286 case AMDGPU::G_FCOPYSIGN:
11288 case AMDGPU::G_FMINNUM:
11289 case AMDGPU::G_FMAXNUM:
11290 case AMDGPU::G_FMINNUM_IEEE:
11291 case AMDGPU::G_FMAXNUM_IEEE: {
11299 case AMDGPU::G_BUILD_VECTOR:
11304 case AMDGPU::G_INTRINSIC:
11305 switch (
MI->getIntrinsicID()) {
11306 case Intrinsic::amdgcn_fmul_legacy:
11307 case Intrinsic::amdgcn_fmad_ftz:
11308 case Intrinsic::amdgcn_sqrt:
11309 case Intrinsic::amdgcn_fmed3:
11310 case Intrinsic::amdgcn_sin:
11311 case Intrinsic::amdgcn_cos:
11312 case Intrinsic::amdgcn_log:
11313 case Intrinsic::amdgcn_exp2:
11314 case Intrinsic::amdgcn_log_clamp:
11315 case Intrinsic::amdgcn_rcp:
11316 case Intrinsic::amdgcn_rcp_legacy:
11317 case Intrinsic::amdgcn_rsq:
11318 case Intrinsic::amdgcn_rsq_clamp:
11319 case Intrinsic::amdgcn_rsq_legacy:
11320 case Intrinsic::amdgcn_div_scale:
11321 case Intrinsic::amdgcn_div_fmas:
11322 case Intrinsic::amdgcn_div_fixup:
11323 case Intrinsic::amdgcn_fract:
11324 case Intrinsic::amdgcn_ldexp:
11325 case Intrinsic::amdgcn_cvt_pkrtz:
11326 case Intrinsic::amdgcn_cubeid:
11327 case Intrinsic::amdgcn_cubema:
11328 case Intrinsic::amdgcn_cubesc:
11329 case Intrinsic::amdgcn_cubetc:
11330 case Intrinsic::amdgcn_frexp_mant:
11331 case Intrinsic::amdgcn_fdot2:
11332 case Intrinsic::amdgcn_trig_preop:
11347SDValue SITargetLowering::getCanonicalConstantFP(
11350 if (
C.isDenormal()) {
11364 if (
C.isSignaling()) {
11386SDValue SITargetLowering::performFCanonicalizeCombine(
11388 DAGCombinerInfo &
DCI)
const {
11391 EVT VT =
N->getValueType(0);
11400 EVT VT =
N->getValueType(0);
11401 return getCanonicalConstantFP(DAG,
SDLoc(
N), VT, CFP->getValueAPF());
11420 for (
unsigned I = 0;
I != 2; ++
I) {
11424 CFP->getValueAPF());
11425 }
else if (
Op.isUndef()) {
11514 if (
MaxK->getAPIntValue().sge(
MinK->getAPIntValue()))
11517 if (
MaxK->getAPIntValue().uge(
MinK->getAPIntValue()))
11521 EVT VT =
MinK->getValueType(0);
11523 if (VT == MVT::i32 || (VT == MVT::i16 && Subtarget->
hasMed3_16()))
11558 if (
K0->getValueAPF() >
K1->getValueAPF())
11566 if (Info->getMode().DX10Clamp) {
11570 if (
K1->isExactlyValue(1.0) &&
K0->isExactlyValue(0.0))
11575 if (VT == MVT::f32 || (VT == MVT::f16 && Subtarget->
hasMed3_16())) {
11586 if ((!
K0->hasOneUse() ||
11587 TII->isInlineConstant(
K0->getValueAPF().bitcastToAPInt())) &&
11588 (!
K1->hasOneUse() ||
11589 TII->isInlineConstant(
K1->getValueAPF().bitcastToAPInt()))) {
11599 DAGCombinerInfo &
DCI)
const {
11602 EVT VT =
N->getValueType(0);
11603 unsigned Opc =
N->getOpcode();
11612 (VT == MVT::i32 || VT == MVT::f32 ||
11613 ((VT == MVT::f16 || VT == MVT::i16) && Subtarget->
hasMin3Max3_16()))) {
11620 N->getValueType(0),
11633 N->getValueType(0),
11669 (VT == MVT::f32 || VT == MVT::f64 ||
11673 if (
SDValue Res = performFPMed3ImmCombine(DAG,
SDLoc(
N), Op0, Op1))
11684 return (
CA->isExactlyValue(0.0) && CB->isExactlyValue(1.0)) ||
11685 (
CA->isExactlyValue(1.0) && CB->isExactlyValue(0.0));
11694 DAGCombinerInfo &
DCI)
const {
11695 EVT VT =
N->getValueType(0);
11718 if (Info->getMode().DX10Clamp) {
11738 DAGCombinerInfo &
DCI)
const {
11742 return DCI.DAG.getUNDEF(
N->getValueType(0));
11755 unsigned VecSize = EltSize *
NumElem;
11758 if (VecSize <= 64 && EltSize < 32)
11771 unsigned NumInsts =
NumElem +
11772 ((EltSize + 31) / 32) *
NumElem ;
11777 return NumInsts <= 16;
11781 return NumInsts <= 15;
11792 unsigned EltSize =
EltVT.getSizeInBits();
11799SDValue SITargetLowering::performExtractVectorEltCombine(
11851 DCI.AddToWorklist(
Elt0.getNode());
11852 DCI.AddToWorklist(
Elt1.getNode());
11874 if (!
DCI.isBeforeLegalize())
11882 VecSize > 32 && VecSize % 32 == 0 &&
Idx) {
11895 DCI.AddToWorklist(
Elt.getNode());
11898 DCI.AddToWorklist(
Srl.getNode());
11916SITargetLowering::performInsertVectorEltCombine(
SDNode *
N,
11917 DAGCombinerInfo &
DCI)
const {
11948 Src.getOperand(0).getValueType() == MVT::f16) {
11949 return Src.getOperand(0);
11953 APFloat Val = CFP->getValueAPF();
11964 DAGCombinerInfo &
DCI)
const {
11966 "combine only useful on gfx8");
11969 EVT VT =
N->getValueType(0);
11970 if (VT != MVT::f16)
12008unsigned SITargetLowering::getFusedOpcode(
const SelectionDAG &DAG,
12015 if (((VT == MVT::f32 &&
12017 (VT == MVT::f16 && Subtarget->
hasMadF16() &&
12025 N1->getFlags().hasAllowContract())) &&
12037 EVT VT =
N->getValueType(0);
12038 if (VT != MVT::i32 && VT != MVT::i64)
12044 unsigned Opc =
N->getOpcode();
12089 DAGCombinerInfo &
DCI)
const {
12093 EVT VT =
N->getValueType(0);
12103 if (!
N->isDivergent() && Subtarget->
hasSMulHi())
12110 if (LHS.getOpcode() !=
ISD::MUL) {
12154 if (VT != MVT::i64) {
12201 if (VT != MVT::i64)
12207 DAGCombinerInfo &
DCI)
const {
12209 EVT VT =
N->getValueType(0);
12223 if (
SDValue V = reassociateScalarOps(
N, DAG)) {
12227 if (VT != MVT::i32 || !
DCI.isAfterLegalizeDAG())
12232 unsigned Opc = LHS.getOpcode();
12237 Opc = RHS.getOpcode();
12243 auto Cond = RHS.getOperand(0);
12251 return DAG.
getNode(Opc, SL, VTList, Args);
12265 DAGCombinerInfo &
DCI)
const {
12267 EVT VT =
N->getValueType(0);
12269 if (VT != MVT::i32)
12278 unsigned Opc = RHS.getOpcode();
12284 auto Cond = RHS.getOperand(0);
12292 return DAG.
getNode(Opc, SL, VTList, Args);
12299 if (!
C || !
C->isZero())
12307SDValue SITargetLowering::performAddCarrySubCarryCombine(
SDNode *
N,
12308 DAGCombinerInfo &
DCI)
const {
12310 if (
N->getValueType(0) != MVT::i32)
12321 unsigned LHSOpc = LHS.getOpcode();
12322 unsigned Opc =
N->getOpcode();
12325 SDValue Args[] = { LHS.getOperand(0), LHS.getOperand(1),
N->getOperand(2) };
12332 DAGCombinerInfo &
DCI)
const {
12337 EVT VT =
N->getValueType(0);
12349 if (
A == LHS.getOperand(1)) {
12350 unsigned FusedOp = getFusedOpcode(DAG,
N, LHS.getNode());
12361 if (
A == RHS.getOperand(1)) {
12362 unsigned FusedOp = getFusedOpcode(DAG,
N, RHS.getNode());
12374 DAGCombinerInfo &
DCI)
const {
12380 EVT VT =
N->getValueType(0);
12393 if (
A == LHS.getOperand(1)) {
12394 unsigned FusedOp = getFusedOpcode(DAG,
N, LHS.getNode());
12408 if (
A == RHS.getOperand(1)) {
12409 unsigned FusedOp = getFusedOpcode(DAG,
N, RHS.getNode());
12421 DAGCombinerInfo &
DCI)
const {
12423 EVT VT =
N->getValueType(0);
12445 (
N->getFlags().hasAllowContract() &&
12446 FMA->getFlags().hasAllowContract())) {
12483 if (
Vec1.getValueType() != MVT::v2f16 ||
Vec2.getValueType() != MVT::v2f16)
12496 DAGCombinerInfo &
DCI)
const {
12502 EVT VT = LHS.getValueType();
12521 if ((
CRHS->isAllOnes() &&
12527 if ((
CRHS->isAllOnes() &&
12539 LHS.getConstantOperandVal(1) != LHS.getConstantOperandVal(2) &&
12546 const APInt &CT = LHS.getConstantOperandAPInt(1);
12547 const APInt &
CF = LHS.getConstantOperandAPInt(2);
12559 if (VT != MVT::f32 && VT != MVT::f64 &&
12573 if (
APF.isInfinity() && !
APF.isNegative()) {
12592 DAGCombinerInfo &
DCI)
const {
12614 unsigned ShiftOffset = 8 *
Offset;
12616 ShiftOffset -=
C->getZExtValue();
12618 ShiftOffset +=
C->getZExtValue();
12620 if (ShiftOffset < 32 && (ShiftOffset % 8) == 0) {
12633 DCI.AddToWorklist(
N);
12646 DAGCombinerInfo &
DCI)
const {
12656 return DCI.DAG.getConstantFP(Zero,
SDLoc(
N),
N->getValueType(0));
12659 APFloat One(
F.getSemantics(),
"1.0");
12661 return DCI.DAG.getConstantFP(One,
SDLoc(
N),
N->getValueType(0));
12671 switch (
N->getOpcode()) {
12673 return performAddCombine(
N,
DCI);
12675 return performSubCombine(
N,
DCI);
12678 return performAddCarrySubCarryCombine(
N,
DCI);
12680 return performFAddCombine(
N,
DCI);
12682 return performFSubCombine(
N,
DCI);
12684 return performSetCCCombine(
N,
DCI);
12695 return performMinMaxCombine(
N,
DCI);
12697 return performFMACombine(
N,
DCI);
12699 return performAndCombine(
N,
DCI);
12701 return performOrCombine(
N,
DCI);
12703 return performXorCombine(
N,
DCI);
12705 return performZeroExtendCombine(
N,
DCI);
12707 return performSignExtendInRegCombine(
N ,
DCI);
12709 return performClassCombine(
N,
DCI);
12711 return performFCanonicalizeCombine(
N,
DCI);
12713 return performRcpCombine(
N,
DCI);
12728 return performUCharToFloatCombine(
N,
DCI);
12730 return performFCopySignCombine(
N,
DCI);
12735 return performCvtF32UByteNCombine(
N,
DCI);
12737 return performFMed3Combine(
N,
DCI);
12739 return performCvtPkRTZCombine(
N,
DCI);
12741 return performClampCombine(
N,
DCI);
12744 EVT VT =
N->getValueType(0);
12747 if (VT == MVT::v2i16 || VT == MVT::v2f16) {
12751 if (
EltVT == MVT::f16)
12761 return performExtractVectorEltCombine(
N,
DCI);
12763 return performInsertVectorEltCombine(
N,
DCI);
12765 return performFPRoundCombine(
N,
DCI);
12772 if (!
DCI.isBeforeLegalize()) {
12774 return performMemSDNodeCombine(MemNode,
DCI);
12787 default:
return ~0u;
12788 case AMDGPU::sub0:
return 0;
12789 case AMDGPU::sub1:
return 1;
12790 case AMDGPU::sub2:
return 2;
12791 case AMDGPU::sub3:
return 3;
12792 case AMDGPU::sub4:
return 4;
12799 unsigned Opcode =
Node->getMachineOpcode();
12836 if (
I.getUse().getResNo() != 0)
12840 if (!
I->isMachineOpcode() ||
12841 I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
12901 "failed to find equivalent MIMG op");
12909 MVT SVT =
Node->getValueType(0).getVectorElementType().getSimpleVT();
12937 for (
unsigned i = 0,
Idx = AMDGPU::sub0; i < 5; ++i) {
12951 case AMDGPU::sub0:
Idx = AMDGPU::sub1;
break;
12952 case AMDGPU::sub1:
Idx = AMDGPU::sub2;
break;
12953 case AMDGPU::sub2:
Idx = AMDGPU::sub3;
break;
12954 case AMDGPU::sub3:
Idx = AMDGPU::sub4;
break;
12964 Op =
Op.getOperand(0);
12984 MRI.createVirtualRegister(&AMDGPU::VReg_1RegClass), MVT::i1);
12992 VReg,
ToVReg.getValue(1));
13000 for (
unsigned i = 0; i < Node->getNumOperands(); ++i) {
13002 Ops.push_back(Node->getOperand(i));
13008 Node->getOperand(i).getValueType(),
13009 Node->getOperand(i)), 0));
13020 unsigned Opcode = Node->getMachineOpcode();
13022 if (
TII->isMIMG(Opcode) && !
TII->get(Opcode).mayStore() &&
13023 !
TII->isGather4(Opcode) &&
13025 return adjustWritemask(Node, DAG);
13028 if (Opcode == AMDGPU::INSERT_SUBREG ||
13029 Opcode == AMDGPU::REG_SEQUENCE) {
13035 case AMDGPU::V_DIV_SCALE_F32_e64:
13036 case AMDGPU::V_DIV_SCALE_F64_e64: {
13046 (Src0 == Src1 || Src0 == Src2))
13081 Ops.push_back(
ImpDef.getValue(1));
13110 unsigned D16Val = D16 ? D16->getImm() : 0;
13159 NewDst =
MRI.createVirtualRegister(
TII->getOpRegClass(
MI, DstIdx));
13177 MI.tieOperands(DstIdx,
MI.getNumOperands() - 1);
13188 if (
TII->isVOP3(
MI.getOpcode())) {
13190 TII->legalizeOperandsVOP3(
MRI,
MI);
13195 if (!
MI.getDesc().operands().empty()) {
13196 unsigned Opc =
MI.getOpcode();
13203 if (!
Op.isReg() || !
Op.getReg().isVirtual())
13205 auto *RC =
TRI->getRegClassForReg(
MRI,
Op.getReg());
13206 if (!
TRI->hasAGPRs(RC))
13208 auto *Src =
MRI.getUniqueVRegDef(
Op.getReg());
13209 if (!Src || !Src->isCopy() ||
13210 !
TRI->isSGPRReg(
MRI, Src->getOperand(1).getReg()))
13212 auto *NewRC =
TRI->getEquivalentVGPRClass(RC);
13216 MRI.setRegClass(
Op.getReg(), NewRC);
13220 if (
auto *Src2 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src2)) {
13221 if (Src2->isReg() && Src2->getReg().isVirtual()) {
13222 auto *RC =
TRI->getRegClassForReg(
MRI, Src2->getReg());
13223 if (
TRI->isVectorSuperClass(RC)) {
13224 auto *NewRC =
TRI->getEquivalentAGPRClass(RC);
13225 MRI.setRegClass(Src2->getReg(), NewRC);
13226 if (Src2->isTied())
13227 MRI.setRegClass(
MI.getOperand(0).getReg(), NewRC);
13236 if (
TII->isMIMG(
MI)) {
13237 if (!
MI.mayStore())
13239 TII->enforceOperandRCAlignment(
MI, AMDGPU::OpName::vaddr);
13266 MVT::v2i32,
Ops0), 0);
13318std::pair<unsigned, const TargetRegisterClass *>
13325 if (Constraint.
size() == 1) {
13327 switch (Constraint[0]) {
13334 RC = &AMDGPU::SReg_32RegClass;
13337 RC = &AMDGPU::SGPR_64RegClass;
13342 return std::pair(0U,
nullptr);
13349 RC = &AMDGPU::VGPR_32RegClass;
13354 return std::pair(0U,
nullptr);
13363 RC = &AMDGPU::AGPR_32RegClass;
13368 return std::pair(0U,
nullptr);
13377 return std::pair(0U, RC);
13382 if (
RegName.consume_front(
"v")) {
13383 RC = &AMDGPU::VGPR_32RegClass;
13384 }
else if (
RegName.consume_front(
"s")) {
13385 RC = &AMDGPU::SGPR_32RegClass;
13386 }
else if (
RegName.consume_front(
"a")) {
13387 RC = &AMDGPU::AGPR_32RegClass;
13392 if (
RegName.consume_front(
"[")) {
13402 RC =
TRI->getVGPRClassForBitWidth(Width);
13404 RC =
TRI->getSGPRClassForBitWidth(Width);
13406 RC =
TRI->getAGPRClassForBitWidth(Width);
13408 Reg =
TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, RC);
13409 return std::pair(Reg, RC);
13422 Ret.second =
TRI->getPhysRegBaseClass(Ret.first);
13428 if (Constraint.
size() == 1) {
13429 switch (Constraint[0]) {
13438 }
else if (Constraint ==
"DA" ||
13439 Constraint ==
"DB") {
13447 if (Constraint.
size() == 1) {
13448 switch (Constraint[0]) {
13470 std::string &Constraint,
13471 std::vector<SDValue> &Ops,
13486 unsigned Size =
Op.getScalarValueSizeInBits();
13494 Val =
C->getSExtValue();
13498 Val =
C->getValueAPF().bitcastToAPInt().getSExtValue();
13504 if (
Op.getOperand(0).isUndef() ||
Op.getOperand(1).isUndef())
13507 Val =
C->getSExtValue();
13511 Val =
C->getValueAPF().bitcastToAPInt().getSExtValue();
13520 const std::string &Constraint,
13522 if (Constraint.size() == 1) {
13523 switch (Constraint[0]) {
13538 }
else if (Constraint.size() == 2) {
13539 if (Constraint ==
"DA") {
13540 int64_t
HiBits =
static_cast<int32_t
>(Val >> 32);
13541 int64_t
LoBits =
static_cast<int32_t
>(Val);
13545 if (Constraint ==
"DB") {
13554 unsigned MaxSize)
const {
13555 unsigned Size = std::min<unsigned>(
Op.getScalarValueSizeInBits(), MaxSize);
13567 case AMDGPU::VReg_64RegClassID:
13568 return AMDGPU::VReg_64_Align2RegClassID;
13569 case AMDGPU::VReg_96RegClassID:
13570 return AMDGPU::VReg_96_Align2RegClassID;
13571 case AMDGPU::VReg_128RegClassID:
13572 return AMDGPU::VReg_128_Align2RegClassID;
13573 case AMDGPU::VReg_160RegClassID:
13574 return AMDGPU::VReg_160_Align2RegClassID;
13575 case AMDGPU::VReg_192RegClassID:
13576 return AMDGPU::VReg_192_Align2RegClassID;
13577 case AMDGPU::VReg_224RegClassID:
13578 return AMDGPU::VReg_224_Align2RegClassID;
13579 case AMDGPU::VReg_256RegClassID:
13580 return AMDGPU::VReg_256_Align2RegClassID;
13581 case AMDGPU::VReg_288RegClassID:
13582 return AMDGPU::VReg_288_Align2RegClassID;
13583 case AMDGPU::VReg_320RegClassID:
13584 return AMDGPU::VReg_320_Align2RegClassID;
13585 case AMDGPU::VReg_352RegClassID:
13586 return AMDGPU::VReg_352_Align2RegClassID;
13587 case AMDGPU::VReg_384RegClassID:
13588 return AMDGPU::VReg_384_Align2RegClassID;
13589 case AMDGPU::VReg_512RegClassID:
13590 return AMDGPU::VReg_512_Align2RegClassID;
13591 case AMDGPU::VReg_1024RegClassID:
13592 return AMDGPU::VReg_1024_Align2RegClassID;
13593 case AMDGPU::AReg_64RegClassID:
13594 return AMDGPU::AReg_64_Align2RegClassID;
13595 case AMDGPU::AReg_96RegClassID:
13596 return AMDGPU::AReg_96_Align2RegClassID;
13597 case AMDGPU::AReg_128RegClassID:
13598 return AMDGPU::AReg_128_Align2RegClassID;
13599 case AMDGPU::AReg_160RegClassID:
13600 return AMDGPU::AReg_160_Align2RegClassID;
13601 case AMDGPU::AReg_192RegClassID:
13602 return AMDGPU::AReg_192_Align2RegClassID;
13603 case AMDGPU::AReg_256RegClassID:
13604 return AMDGPU::AReg_256_Align2RegClassID;
13605 case AMDGPU::AReg_512RegClassID:
13606 return AMDGPU::AReg_512_Align2RegClassID;
13607 case AMDGPU::AReg_1024RegClassID:
13608 return AMDGPU::AReg_1024_Align2RegClassID;
13624 if (Info->isEntryFunction()) {
13633 ? AMDGPU::SGPR_32RegClass.getRegister(
MaxNumSGPRs - 1)
13634 :
TRI->getAlignedHighSGPRForRC(MF, 2,
13635 &AMDGPU::SGPR_64RegClass);
13636 Info->setSGPRForEXECCopy(
SReg);
13638 assert(!
TRI->isSubRegister(Info->getScratchRSrcReg(),
13639 Info->getStackPtrOffsetReg()));
13640 if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG)
13641 MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());
13645 if (Info->getScratchRSrcReg() != AMDGPU::PRIVATE_RSRC_REG)
13646 MRI.replaceRegWith(AMDGPU::PRIVATE_RSRC_REG, Info->getScratchRSrcReg());
13648 if (Info->getFrameOffsetReg() != AMDGPU::FP_REG)
13649 MRI.replaceRegWith(AMDGPU::FP_REG, Info->getFrameOffsetReg());
13651 Info->limitOccupancy(MF);
13653 if (ST.isWave32() && !MF.
empty()) {
13654 for (
auto &
MBB : MF) {
13655 for (
auto &
MI :
MBB) {
13656 TII->fixImplicitOperands(
MI);
13666 if (ST.needsAlignedVGPRs()) {
13667 for (
unsigned I = 0,
E =
MRI.getNumVirtRegs();
I !=
E; ++
I) {
13685 unsigned Depth)
const {
13687 unsigned Opc =
Op.getOpcode();
13692 case Intrinsic::amdgcn_mbcnt_lo:
13693 case Intrinsic::amdgcn_mbcnt_hi: {
13703 unsigned Size =
Op.getValueType().getSizeInBits();
13728 unsigned MaxValue =
13737 switch (
MI->getOpcode()) {
13738 case AMDGPU::G_INTRINSIC: {
13739 switch (
MI->getIntrinsicID()) {
13740 case Intrinsic::amdgcn_workitem_id_x:
13743 case Intrinsic::amdgcn_workitem_id_y:
13746 case Intrinsic::amdgcn_workitem_id_z:
13749 case Intrinsic::amdgcn_mbcnt_lo:
13750 case Intrinsic::amdgcn_mbcnt_hi: {
13752 unsigned Size =
MRI.getType(R).getSizeInBits();
13756 case Intrinsic::amdgcn_groupstaticsize: {
13767 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
13770 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
13773 case AMDGPU::G_AMDGPU_SMED3:
13774 case AMDGPU::G_AMDGPU_UMED3: {
13775 auto [Dst, Src0, Src1, Src2] =
MI->getFirst4Regs();
13802 unsigned Depth)
const {
13804 switch (
MI->getOpcode()) {
13805 case AMDGPU::G_INTRINSIC:
13806 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
13842 if (Header->getAlignment() != PrefAlign)
13843 return Header->getAlignment();
13845 unsigned LoopSize = 0;
13853 LoopSize +=
TII->getInstSizeInBytes(
MI);
13854 if (LoopSize > 192)
13859 if (LoopSize <= 64)
13862 if (LoopSize <= 128)
13869 auto I = Exit->getFirstNonDebugInstr();
13870 if (
I != Exit->end() &&
I->getOpcode() == AMDGPU::S_INST_PREFETCH)
13881 std::prev(
PreTerm)->getOpcode() != AMDGPU::S_INST_PREFETCH)
13887 ExitHead->getOpcode() != AMDGPU::S_INST_PREFETCH)
13900 N =
N->getOperand(0).getNode();
13911 switch (
N->getOpcode()) {
13919 if (Reg.isPhysical() ||
MRI.isLiveIn(Reg))
13920 return !
TRI->isSGPRReg(
MRI, Reg);
13926 return !
TRI->isSGPRReg(
MRI, Reg);
13930 unsigned AS = L->getAddressSpace();
13967 return A->readMem() &&
A->writeMem();
13988 switch (Ty.getScalarSizeInBits()) {
14002 unsigned Depth)
const {
14007 if (Info->getMode().DX10Clamp)
14031 return F->getFnAttribute(
"amdgpu-unsafe-fp-atomics").getValueAsString() !=
14037 unsigned AS =
RMW->getPointerAddressSpace();
14041 auto SSID =
RMW->getSyncScopeID();
14047 Ctx.getSyncScopeNames(SSNs);
14048 auto MemScope = SSNs[
RMW->getSyncScopeID()].empty()
14050 : SSNs[
RMW->getSyncScopeID()];
14053 <<
"Hardware instruction generated for atomic "
14054 <<
RMW->getOperationName(
RMW->getOperation())
14055 <<
" operation at memory scope " <<
MemScope
14056 <<
" due to an unsafe request.";
14063 SSID ==
RMW->getContext().getOrInsertSyncScopeID(
"one-as");
14065 switch (
RMW->getOperation()) {
14069 if (Ty->isHalfTy())
14072 if (!Ty->isFloatTy() && (!Subtarget->
hasGFX90AInsts() || !Ty->isDoubleTy()))
14125 if (!Ty->isDoubleTy())
14131 return RMW->getFunction()
14132 ->getFnAttribute(
"amdgpu-unsafe-fp-atomics")
14133 .getValueAsString() ==
"true"
14147 if (
RMW->getType()->isFloatTy() &&
14189 if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
14191 : &AMDGPU::SReg_32RegClass;
14192 if (!
TRI->isSGPRClass(RC) && !isDivergent)
14193 return TRI->getEquivalentSGPRClass(RC);
14194 else if (
TRI->isSGPRClass(RC) && isDivergent)
14195 return TRI->getEquivalentVGPRClass(RC);
14217 if (!Visited.insert(V).second)
14219 bool Result =
false;
14220 for (
const auto *U : V->users()) {
14222 if (V == U->getOperand(1)) {
14223 switch (Intrinsic->getIntrinsicID()) {
14227 case Intrinsic::amdgcn_if_break:
14228 case Intrinsic::amdgcn_if:
14229 case Intrinsic::amdgcn_else:
14234 if (V == U->getOperand(0)) {
14235 switch (Intrinsic->getIntrinsicID()) {
14239 case Intrinsic::amdgcn_end_cf:
14240 case Intrinsic::amdgcn_loop:
14255 const Value *V)
const {
14257 if (CI->isInlineAsm()) {
14270 SIRI, TC.ConstraintCode, TC.ConstraintVT).second;
14271 if (RC &&
SIRI->isSGPRClass(RC))
14283 for (;
I !=
E; ++
I) {
14307 return MRI.hasOneNonDBGUse(N0);
14313 if (
I.getMetadata(
"amdgpu.noclobber"))
14323 if (!Def->isMachineOpcode())
14334 PhysReg = AMDGPU::SCC;
14336 TRI->getMinimalPhysRegClass(PhysReg, Def->getSimpleValueType(ResNo));
14345 "target should have atomic fadd instructions");
14348 "generic atomicrmw expansion only supports FP32 operand in flat "
14351 "only fadd is supported for now");
14423 for (
auto &
P : MDs)
14424 OldVal->setMetadata(
P.first,
P.second);
14428 std::prev(BB->
end())->eraseFromParent();
14429 Builder.SetInsertPoint(BB);
14433 CallInst *
IsShared = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_shared, {},
14434 {
Addr},
nullptr,
"is.shared");
14441 Builder.CreateBr(
PhiBB);
14444 CallInst *IsPrivate = Builder.CreateIntrinsic(
14445 Intrinsic::amdgcn_is_private, {}, {
Addr},
nullptr,
"is.private");
14452 Builder.CreateLoad(ValTy,
CastToPrivate,
"loaded.private");
14455 Builder.CreateBr(
PhiBB);
14461 Builder.CreateBr(
PhiBB);
14463 Builder.SetInsertPoint(
PhiBB);
14464 PHINode *
Loaded = Builder.CreatePHI(ValTy, 3,
"loaded.phi");
14468 Builder.CreateBr(ExitBB);
14486 LoadInst *LI = Builder.CreateAlignedLoad(
unsigned const MachineRegisterInfo * MRI
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void packImage16bitOpsToDwords(MachineIRBuilder &B, MachineInstr &MI, SmallVectorImpl< Register > &PackedAddrs, unsigned ArgOffset, const AMDGPU::ImageDimIntrinsicInfo *Intr, bool IsA16, bool IsG16)
Turn a set of s16 typed registers in AddrRegs into a dword sized vector with s16 typed elements.
static bool isKnownNonNull(Register Val, MachineRegisterInfo &MRI, const AMDGPUTargetMachine &TM, unsigned AddrSpace)
Return true if the value is a known valid address, such that a null check is not necessary.
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Provides AMDGPU specific target descriptions.
The AMDGPU TargetMachine interface definition for hw codegen targets.
This file implements a class to represent arbitrary precision integral constant values and operations...
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ATTRIBUTE_UNUSED
static const std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
Utilities for dealing with flags related to floating point properties and mode controls.
Provides analysis for querying information about KnownBits during GISel passes.
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
iv Induction Variable Users
static const unsigned MaxDepth
Contains matchers for matching SSA Machine Instructions.
unsigned const TargetRegisterInfo * TRI
Promote Memory to Register
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > & Cond
static void r0(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
static void r3(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
static void r2(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
static void r1(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
#define FP_DENORM_FLUSH_NONE
#define FP_DENORM_FLUSH_IN_FLUSH_OUT
static void reservePrivateMemoryRegs(const TargetMachine &TM, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
static SDValue adjustLoadValueTypeImpl(SDValue Result, EVT LoadVT, const SDLoc &DL, SelectionDAG &DAG, bool Unpacked)
static MachineBasicBlock * emitIndirectSrc(MachineInstr &MI, MachineBasicBlock &MBB, const GCNSubtarget &ST)
static bool denormalModeIsFlushAllF64F16(const MachineFunction &MF)
static SDValue constructRetValue(SelectionDAG &DAG, MachineSDNode *Result, ArrayRef< EVT > ResultTypes, bool IsTexFail, bool Unpacked, bool IsD16, int DMaskPop, int NumVDataDwords, const SDLoc &DL)
static EVT memVTFromLoadIntrData(Type *Ty, unsigned MaxNumLanes)
static bool isBoolSGPR(SDValue V)
static std::pair< unsigned, int > computeIndirectRegAndOffset(const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, unsigned VecReg, int Offset)
static bool denormalModeIsFlushAllF32(const MachineFunction &MF)
static bool fpModeMatchesGlobalFPAtomicMode(const AtomicRMWInst *RMW)
static bool addresses16Bits(int Mask)
static bool isClampZeroToOne(SDValue A, SDValue B)
static unsigned findFirstFreeSGPR(CCState &CCInfo)
static uint32_t getPermuteMask(SDValue V)
static int getAlignedAGPRClassID(unsigned UnalignedClassID)
static void processPSInputArgs(SmallVectorImpl< ISD::InputArg > &Splits, CallingConv::ID CallConv, ArrayRef< ISD::InputArg > Ins, BitVector &Skipped, FunctionType *FType, SIMachineFunctionInfo *Info)
static SDValue getLoadExtOrTrunc(SelectionDAG &DAG, ISD::LoadExtType ExtType, SDValue Op, const SDLoc &SL, EVT VT)
static SDValue strictFPExtFromF16(SelectionDAG &DAG, SDValue Src)
Return the source of an fp_extend from f16 to f32, or a converted FP constant.
static bool bitOpWithConstantIsReducible(unsigned Opc, uint32_t Val)
static cl::opt< bool > DisableLoopAlignment("amdgpu-disable-loop-alignment", cl::desc("Do not align and prefetch loops"), cl::init(false))
static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI, unsigned InitResultReg, unsigned PhiReg, int Offset, bool UseGPRIdxMode, Register &SGPRIdxReg)
static bool isImmConstraint(StringRef Constraint)
static SDValue padEltsToUndef(SelectionDAG &DAG, const SDLoc &DL, EVT CastVT, SDValue Src, int ExtraElts)
static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool hasCFUser(const Value *V, SmallPtrSet< const Value *, 16 > &Visited, unsigned WaveSize)
static EVT memVTFromLoadIntrReturn(Type *Ty, unsigned MaxNumLanes)
static unsigned SubIdx2Lane(unsigned Idx)
Helper function for adjustWritemask.
static bool is16BitScalarOp(SDValue &Operand)
static bool addressMayBeAccessedAsPrivate(const MachineMemOperand *MMO, const SIMachineFunctionInfo &Info)
static MachineBasicBlock * lowerWaveReduce(MachineInstr &MI, MachineBasicBlock &BB, const GCNSubtarget &ST, unsigned Opc)
static bool hasEightBitAccesses(uint64_t PermMask, SDValue &Op, SDValue &OtherOp)
static bool elementPairIsContiguous(ArrayRef< int > Mask, int Elt)
static ArgDescriptor allocateSGPR32InputImpl(CCState &CCInfo, const TargetRegisterClass *RC, unsigned NumArgRegs)
static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue N0, SDValue N1, SDValue N2, bool Signed)
bool unsafeFPAtomicsDisabled(Function *F)
static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(const SIInstrInfo *TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, const DebugLoc &DL, const MachineOperand &Idx, unsigned InitReg, unsigned ResultReg, unsigned PhiReg, unsigned InitSaveExecReg, int Offset, bool UseGPRIdxMode, Register &SGPRIdxReg)
static bool isFrameIndexOp(SDValue Op)
static ConstantFPSDNode * getSplatConstantFP(SDValue Op)
static void allocateSGPR32Input(CCState &CCInfo, ArgDescriptor &Arg)
static bool vectorEltWillFoldAway(SDValue Op)
static SDValue getSPDenormModeValue(uint32_t SPDenormMode, SelectionDAG &DAG, const SIMachineFunctionInfo *Info, const GCNSubtarget *ST)
static uint32_t getConstantPermuteMask(uint32_t C)
static MachineBasicBlock * emitIndirectDst(MachineInstr &MI, MachineBasicBlock &MBB, const GCNSubtarget &ST)
static void setM0ToIndexFromSGPR(const SIInstrInfo *TII, MachineRegisterInfo &MRI, MachineInstr &MI, int Offset)
static ArgDescriptor allocateVGPR32Input(CCState &CCInfo, unsigned Mask=~0u, ArgDescriptor Arg=ArgDescriptor())
static std::pair< MachineBasicBlock *, MachineBasicBlock * > splitBlockForLoop(MachineInstr &MI, MachineBasicBlock &MBB, bool InstInLoop)
static unsigned getBasePtrIndex(const MemSDNode *N)
MemSDNode::getBasePtr() does not work for intrinsics, which needs to offset by the chain and intrinsi...
static void knownBitsForWorkitemID(const GCNSubtarget &ST, GISelKnownBits &KB, KnownBits &Known, unsigned Dim)
static LLVM_ATTRIBUTE_UNUSED bool isCopyFromRegOfInlineAsm(const SDNode *N)
static void allocateFixedSGPRInputImpl(CCState &CCInfo, const TargetRegisterClass *RC, MCRegister Reg)
static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static Register getIndirectSGPRIdx(const SIInstrInfo *TII, MachineRegisterInfo &MRI, MachineInstr &MI, int Offset)
static SDValue emitNonHSAIntrinsicError(SelectionDAG &DAG, const SDLoc &DL, EVT VT)
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc)
static unsigned getIdxEn(SDValue VIndex)
static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static SDValue buildSMovImm32(SelectionDAG &DAG, const SDLoc &DL, uint64_t Val)
static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL, ArrayRef< SDValue > Elts)
static SDNode * findUser(SDValue Value, unsigned Opcode)
Helper function for LowerBRCOND.
static uint64_t clearUnusedBits(uint64_t Val, unsigned Size)
static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL, EVT VT, SDValue A, SDValue B, SDValue C, SDValue GlueChain, SDNodeFlags Flags)
static SDValue emitRemovedIntrinsicError(SelectionDAG &DAG, const SDLoc &DL, EVT VT)
static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL, EVT VT, SDValue A, SDValue B, SDValue GlueChain, SDNodeFlags Flags)
static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV, const SDLoc &DL, int64_t Offset, EVT PtrVT, unsigned GAFlags=SIInstrInfo::MO_NONE)
static cl::opt< bool > UseDivergentRegisterIndexing("amdgpu-use-divergent-register-indexing", cl::Hidden, cl::desc("Use indirect register addressing for divergent indexes"), cl::init(false))
static const std::optional< ByteProvider< SDValue > > calculateSrcByte(const SDValue Op, uint64_t DestByte, uint64_t SrcIndex=0, unsigned Depth=0)
static void allocateSGPR64Input(CCState &CCInfo, ArgDescriptor &Arg)
SI DAG Lowering interface definition.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static constexpr int Concat[]
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
static bool isUniformMMO(const MachineMemOperand *MMO)
static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
uint32_t getLDSSize() const
void setDynLDSAlign(const Function &F, const GlobalVariable &GV)
bool isEntryFunction() const
bool hasMadMacF32Insts() const
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
bool hasMadMixInsts() const
bool has16BitInsts() const
bool isAmdHsaOrMesa(const Function &F) const
bool hasFastFMAF32() const
bool hasTrigReducedRange() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
bool hasVOP3PInsts() const
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG)
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const
Split a vector load into 2 loads of half the vector.
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
SDValue storeStackInputValue(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, SDValue ArgVal, int64_t Offset) const
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, uint32_t ValLo, uint32_t ValHi) const
Split the 64-bit value LHS into two 32-bit components, and perform the binary operation Opc to it wit...
SDValue lowerUnhandledCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals, StringRef Reason) const
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, MachineFrameInfo &MFI, int ClobberedFI) const
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue loadInputValue(SelectionDAG &DAG, const TargetRegisterClass *RC, EVT VT, const SDLoc &SL, const ArgDescriptor &Arg) const
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT)
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT, const SDLoc &SL, bool RawReg=false) const
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const
Split a vector store into 2 stores of half the vector.
std::pair< SDValue, SDValue > split64BitValue(SDValue Op, SelectionDAG &DAG) const
Return 64-bit value Op as two 32-bit integers.
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
static bool allUsesHaveSourceMods(const SDNode *N, unsigned CostThreshold=4)
bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const override
If SNaN is false,.
static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG)
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const
static bool shouldFoldFNegIntoSrc(SDNode *FNeg, SDValue FNegSrc)
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const
Widen a suitably aligned v3 load.
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Class for arbitrary precision integers.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
This class represents an incoming formal argument to a Function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
@ Min
*p = old <signed v ? old : v
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This is an SDNode representing atomic operations.
MemoryEffects getMemoryEffects() const
Returns memory effects of the function.
LLVM Basic Block Representation.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
static ByteProvider getConstantZero()
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isAllocated(MCRegister Reg) const
isAllocated - Return true if the specified register (or an alias) is allocated.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
bool isIntPredicate() const
This is the shared class of boolean and integer constants.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Diagnostic information for unsupported feature in backend.
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Register DemoteRegister
DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg allocated to hold a pointer to ...
const Value * getValueFromVirtualReg(Register Vreg)
This method is called from TargetLowerinInfo::isSDNodeSourceOfDivergence to get the Value correspondi...
Class to represent function types.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasD16Images() const
bool hasImageStoreD16Bug() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool hasDot7Insts() const
bool hasApertureRegs() const
bool hasFlatInstOffsets() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasBCNT(unsigned Size) const
bool hasMultiDwordFlatScratchAddressing() const
bool hasArchitectedSGPRs() const
bool hasDenormModeInst() const
bool hasUnalignedDSAccessEnabled() const
const SIInstrInfo * getInstrInfo() const override
bool hasAtomicFaddRtnInsts() const
Align getStackAlignment() const
bool enableFlatScratch() const
bool hasDwordx3LoadStores() const
bool hasFlatScrRegister() const
bool supportsGetDoorbellID() const
bool hasFlatAtomicFaddF32Inst() const
const SIRegisterInfo * getRegisterInfo() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool hasLDSMisalignedBug() const
bool hasUserSGPRInit16Bug() const
TrapHandlerAbi getTrapHandlerAbi() const
bool hasUnalignedScratchAccess() const
bool hasLDSFPAtomicAdd() const
bool hasMin3Max3_16() const
bool hasGFX10_AEncoding() const
bool hasPackedFP32Ops() const
bool hasGFX940Insts() const
bool hasFullRate64Ops() const
bool isTrapHandlerEnabled() const
bool hasFlatGlobalInsts() const
bool getScalarizeGlobalBehavior() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
bool hasNSAEncoding() const
bool usePRTStrictNull() const
bool hasUnalignedBufferAccessEnabled() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasImageGather4D16Bug() const
bool supportsMinMaxDenormModes() const
bool hasAtomicFaddInsts() const
bool hasAtomicFaddNoRtnInsts() const
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDS96AndDS128() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasUnpackedD16VMem() const
bool hasFmaMixInsts() const
bool hasPackedTID() const
unsigned getNSAMaxSize() const
bool hasAddNoCarry() const
bool hasGWSAutoReplay() const
virtual void computeKnownBitsImpl(Register R, KnownBits &Known, const APInt &DemandedElts, unsigned Depth=0)
const MachineFunction & getMachineFunction() const
bool hasExternalLinkage() const
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
Type * getValueType() const
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
const BasicBlock * getParent() const
void getAllMetadata(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
Get all metadata attached to this Instruction.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
This class is used to represent ISD::LOAD nodes.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Describe properties that are true of each instruction in the target description file.
bool isCompare() const
Return true if this instruction is a comparison.
bool hasImplicitDefOfPhysReg(unsigned Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Wrapper class representing physical registers. Should be passed by value.
Helper class for constructing bundles of MachineInstrs.
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
iterator getFirstNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the first non-debug instruction in the basic block, or end().
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Align getAlignment() const
Return alignment of the basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasCalls() const
Return true if the current function has any function calls.
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
bool hasStackObjects() const
Return true if there are any stack objects in this function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void setType(Register VReg, LLT Ty)
Set the low-level type of VReg to Ty.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool onlyWritesMemory() const
Whether this function only (at most) writes memory.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyReadsMemory() const
Whether this function only (at most) reads memory.
A Module instance is used to store all the information related to an LLVM module.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
static bool isLegalMUBUFImmOffset(unsigned Imm)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getMaxMUBUFImmOffset()
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
SIModeRegisterDefaults getMode() const
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const
unsigned getBytesInStackArgArea() const
const AMDGPUGWSResourcePseudoSourceValue * getGWSPSV(const AMDGPUTargetMachine &TM)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isVGPRClass(const TargetRegisterClass *RC)
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isTypeDesirableForOp(unsigned Op, EVT VT) const override
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override
Fold the instructions after selecting them.
SDValue splitTernaryVectorOp(SDValue Op, SelectionDAG &DAG) const
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, unsigned &PhysReg, int &Cost) const override
Allows the target to handle physreg-carried dependency in target-specific way.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool requiresUniformRegister(MachineFunction &MF, const Value *V) const override
Allows target to decide about the register class of the specific value that is live outside the defin...
bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override
Returns true if be combined with to form an ISD::FMAD.
AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
void bundleInstWithWaitcnt(MachineInstr &MI) const
Insert MI into a BUNDLE with an S_WAITCNT 0 immediately following it.
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override
Return the type to use for a scalar shift opcode, given the shifted amount type.
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
MVT getPointerTy(const DataLayout &DL, unsigned AS) const override
Map address space 7 to MVT::v5i32 because that's its in-memory representation.
bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock * > &Exits) const override
Insert explicit copies in entry and exit blocks.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
bool allowsMisalignedMemoryAccessesImpl(unsigned Size, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
const GCNSubtarget * getSubtarget() const
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
bool shouldEmitGOTReloc(const GlobalValue *GV) const
bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth=5) const
bool hasAtomicFaddRtnForTy(SDValue &Op) const
SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const
void AddIMGInit(MachineInstr &MI) const
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
SDValue lowerDYNAMIC_STACKALLOCImpl(SDValue Op, SelectionDAG &DAG) const
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const override
void allocateHSAUserSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent) const override
Return the register class that should be used for the specified value type.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
bool isLegalGlobalAddressingMode(const AddrMode &AM) const
void computeKnownBitsForFrameIndex(int FrameIdx, KnownBits &Known, const MachineFunction &MF) const override
Determine which of the bits of FrameIndex FIOp are known to be 0.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
bool getAsmOperandConstVal(SDValue Op, uint64_t &Val) const
bool isShuffleMaskLegal(ArrayRef< int >, EVT) const override
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
LLT getPreferredShiftAmountTy(LLT Ty) const override
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool isMemOpUniform(const SDNode *N) const
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
MachineBasicBlock * emitGWSMemViolTestLoop(MachineInstr &MI, MachineBasicBlock *BB) const
bool checkAsmConstraintValA(SDValue Op, uint64_t Val, unsigned MaxSize=64) const
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool shouldEmitFixup(const GlobalValue *GV) const
MachineBasicBlock * splitKillBlock(MachineInstr &MI, MachineBasicBlock *BB) const
bool hasMemSDNodeUser(SDNode *N) const
bool isSDNodeSourceOfDivergence(const SDNode *N, FunctionLoweringInfo *FLI, UniformityInfo *UA) const override
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SmallVectorImpl< ISD::InputArg > &Ins, SelectionDAG &DAG) const
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const
SDValue LowerCallResult(SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals, bool isThisReturn, SDValue ThisVal) const
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const override
If SNaN is false,.
AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation input to an Opcode operation is free (for instance,...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
Assign the register class depending on the number of bits set in the writemask.
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void allocateSpecialInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments at the end of allocated user arguments.
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
static bool isNonGlobalAddrSpace(unsigned AS)
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the 'Add TID' bit enabled The TID (Thread ID) is multiplied by the ...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool mayBeEmittedAsTailCall(const CallInst *) const override
Return true if the target may be able emit the call instruction as a tail call.
void passSpecialInputs(CallLoweringInfo &CLI, CCState &CCInfo, const SIMachineFunctionInfo &Info, SmallVectorImpl< std::pair< unsigned, SDValue > > &RegsToPass, SmallVectorImpl< SDValue > &MemOpChains, SDValue Chain) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, bool IsDivergentIdx, const GCNSubtarget *Subtarget)
Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be expanded into a set of cmp...
bool shouldUseLDSConstAddress(const GlobalValue *GV) const
bool supportSplitCSR(MachineFunction *MF) const override
Return true if the target supports that a subset of CSRs for the given machine function is handled ex...
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
bool checkAsmConstraintVal(SDValue Op, const std::string &Constraint, uint64_t Val) const
bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
LLT handling variant.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI)
bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
bool shouldEmitPCReloc(const GlobalValue *GV) const
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, MachineFunction &MF, unsigned IntrinsicID) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const
bool getAddrModeArguments(IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const override
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const override
Determine the known alignment for the pointer value R.
MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override
Similarly, the in-memory representation of a p7 is {p8, i32}, aka v8i32 when padding is added.
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const Pass * getPass() const
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
constexpr size_t size() const
size - Get the string size.
bool startswith(StringRef Prefix) const
bool endswith(StringRef Suffix) const
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Information about stack frame layout on the target.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
@ ZeroOrOneBooleanContent
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
unsigned getID() const
Return the register class ID number.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
int getCopyCost() const
Return the cost of copying a value between two registers in this class.
iterator begin() const
begin/end - Return all of the registers in this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
OSType getOS() const
Get the parsed operating system type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize Fixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isFunctionTy() const
True if this is an instance of FunctionType.
const fltSemantics & getFltSemantics() const
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
void takeName(Value *V)
Transfer the name from V to this value.
constexpr bool isZero() const
Implementation for an ilist node.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ STREAMOUT_REGISTER
Internal address spaces. Can be freely renumbered.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
@ TBUFFER_LOAD_FORMAT_D16
@ TBUFFER_STORE_FORMAT_D16
@ BUFFER_STORE_FORMAT_D16
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width)
void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width)
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isFlatGlobalAddrSpace(unsigned AS)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
unsigned getCodeObjectVersion(const Module &M)
bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE bool isKernel(CallingConv::ID CC)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isShader(CallingConv::ID cc)
bool isGFX10Plus(const MCSubtargetInfo &STI)
LLVM_READONLY int getVOPe64(uint16_t Opcode)
bool isExtendedGlobalAddrSpace(unsigned AS)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const RsrcIntrinsic * lookupRsrcIntrinsic(unsigned Intr)
bool isGraphics(CallingConv::ID cc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ MaxID
The highest possible ID. Must be some 2^k - 1.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
@ BSWAP
Byte Swap and Counting operators.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
@ FADD
Simple binary floating point operators.
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ BR
Control flow instructions. These all have token chains.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SMULO
Same for multiplication.
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ INLINEASM
INLINEASM - Represents an inline asm block.
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred)
getICmpCondCode - Return the ISD condition code corresponding to the given LLVM IR integer condition ...
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
testing::Matcher< const detail::ErrorHolder & > Failed()
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool isReleaseOrStronger(AtomicOrdering AO)
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getUndefRegState(bool B)
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
DWARFExpression::Operation Op
@ TowardPositive
roundTowardPositive.
@ TowardNegative
roundTowardNegative.
unsigned M0(unsigned Val)
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
ArrayRef(const T &OneElt) -> ArrayRef< T >
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
static constexpr ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)
Helper struct shared between Function Specialization and SCCP Solver.
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getPreserveSign()
static constexpr DenormalMode getIEEE()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool isInteger() const
Return true if this is an integer or a vector integer type.
void resetAll()
Resets the known state of all bits.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasAllowContract() const
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
DenormalMode FP64FP16Denormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
DenormalMode FP32Denormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals